matplotlib

matplotlib

测试用数据:

unrate.csv:https://pan.baidu.com/s/1uVCVvYKfcphjqcbMpLBdkg

fandango_scores.csv:https://pan.baidu.com/s/1jareiLJC0YzNKgTOUxdbqQ

percent-bachelors-degrees-women-usa.csv:https://pan.baidu.com/s/1oPtYASsjEoZbD8PEwNWFrw

train.csv:https://pan.baidu.com/s/1Y2NaPDYtRxFWJABd1YxxJg

import pandas as pd                                     

unrate = pd.read_csv('unrate.csv')
#unrate['DATE'] = pd.to_datetime(unrate['DATE'])
print(unrate.head(12))
  •           DATE  VALUE
    0 1948-01-01 3.4
    1 1948-02-01 3.8
    2 1948-03-01 4.0
    3 1948-04-01 3.9
    4 1948-05-01 3.5
    5 1948-06-01 3.6
    6 1948-07-01 3.6
    7 1948-08-01 3.9
    8 1948-09-01 3.8
    9 1948-10-01 3.7
    10 1948-11-01 3.8
    11 1948-12-01 4.0
import matplotlib.pyplot as plt

plt.plot()
plt.show()

first_twelve = unrate[0:12]
plt.plot(first_twelve['DATE'], first_twelve['VALUE'])
plt.show()

plt.plot(first_twelve['DATE'], first_twelve['VALUE'])
plt.xticks(rotation=45)
# print help(plt.xticks)
plt.show()

plt.figure(figsize=(10, 10))
plt.plot(first_twelve['DATE'], first_twelve['VALUE'])
plt.xticks(rotation=90)
plt.xlabel('Month')
plt.ylabel('Unemployment Rate')
plt.title('Monthly Unemployment Trends, 1948')
plt.show()

import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(3,2,1)
ax2 = fig.add_subplot(3,2,2)
ax6 = fig.add_subplot(3,2,6)
plt.show()

import numpy as np
fig = plt.figure()
#fig = plt.figure(figsize=(3, 3))
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)

ax1.plot(np.random.randint(1,5,5), np.arange(5))
ax2.plot(np.arange(10)*3, np.arange(10))
plt.show()

print(unrate['DATE'])
unrate['DATE'] = pd.to_datetime(unrate['DATE'])
print(unrate['DATE'])
#unrate['MONTH'] = unrate['DATE'].dt.month
unrate['MONTH'] = unrate['DATE'].dt.month
fig = plt.figure(figsize=(6,3))

plt.plot(unrate[0:12]['MONTH'], unrate[0:12]['VALUE'], c='red')
plt.plot(unrate[12:24]['MONTH'], unrate[12:24]['VALUE'], c='blue')

plt.show()

fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i])

plt.show()

fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
label = str(1948 + i)
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
plt.legend(loc='best')
#print (help(plt.legend))
plt.show()

fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
start_index = i*12
end_index = (i+1)*12
subset = unrate[start_index:end_index]
label = str(1948 + i)
plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
plt.legend(loc='upper left')
plt.xlabel('Month, Integer')
plt.ylabel('Unemployment Rate, Percent')
plt.title('Monthly Unemployment Trends, 1948-1952')

plt.show()

import pandas as pd
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
print(norm_reviews[:1])
  •                              FILM  RT_user_norm  Metacritic_user_nom  \
    0 Avengers: Age of Ultron (2015) 4.3 3.55

    IMDB_norm Fandango_Ratingvalue Fandango_Stars
    0 3.9 4.5 5.0
import matplotlib.pyplot as plt
from numpy import arange
#The Axes.bar() method has 2 required parameters, left and height.
#We use the left parameter to specify the x coordinates of the left sides of the bar.
#We use the height parameter to specify the height of each bar
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_heights = norm_reviews.loc[0, num_cols].values
#print (bar_heights)
bar_positions = arange(5) + 0.75
#print (bar_positions)
fig, ax = plt.subplots()
ax.bar(bar_positions, bar_heights, 0.5)
plt.show()

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.loc[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()
# print(fig)
# print(ax)
# print(help(plt.subplots))
ax.bar(bar_positions, bar_heights, 0.5)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols, rotation=90)

ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

import matplotlib.pyplot as plt
from numpy import arange
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_widths = norm_reviews.loc[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()
ax.barh(bar_positions, bar_widths, 0.5)

ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

fig, ax = plt.subplots()
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()

#Switching Axes
fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue'])
ax2.set_xlabel('Rotten Tomatoes')
ax2.set_ylabel('Fandango')
plt.show()

fig, ax = plt.subplots()
#ax.hist(norm_reviews['Fandango_Ratingvalue'])
#ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20) #4-5 其实结束
plt.show()

fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
ax1.set_ylim(0, 50)

ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5))
ax2.set_title('Distribution of Rotten Tomatoes Ratings')
ax2.set_ylim(0, 50)

ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5))
ax3.set_title('Distribution of Metacritic Ratings')
ax3.set_ylim(0, 50)

ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5))
ax4.set_title('Distribution of IMDB Ratings')
ax4.set_ylim(0, 50)

plt.show()

fig, ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0, 5)
plt.show()

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values) #箱线图
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0,5)
plt.show()

women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
plt.plot(women_degrees['Year'], women_degrees['Biology'])
plt.show()

plt.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
plt.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
plt.legend(loc='upper right')
plt.title('Percentage of Biology Degrees Awarded By Gender')
plt.show()

fig, ax = plt.subplots()

ax.tick_params(bottom="off", top='on', left="off", right="on")
fig, ax = plt.subplots()

ax.plot(women_degrees['Year'], women_degrees['Biology'], label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees['Biology'], label='Men')

ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.set_title('Percentage of Biology Degrees Awarded By Gender')
ax.legend(loc="upper right")

plt.show()

fig, ax = plt.subplots()
ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men')
ax.tick_params(bottom="off", top="off", left="off", right="off")
print(type(ax.spines))
for key,spine in ax.spines.items():
spine.set_visible(False)


ax.legend(loc='upper right')
plt.show()

major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']
fig = plt.figure(figsize=(12, 12))

for sp in range(0,4):
ax = fig.add_subplot(2,2,sp+1)
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men')



plt.legend(loc='upper right')
plt.show()

#Color
import pandas as pd
import matplotlib.pyplot as plt

women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']


cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange = (255/255, 128/255, 14/255)

fig = plt.figure(figsize=(12, 12))

for sp in range(0,4):
ax = fig.add_subplot(2,2,sp+1)
# The color for each line is assigned here.
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women')
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men')
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.legend(loc='upper right')
plt.show()

#Setting Line Width
cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange = (255/255, 128/255, 14/255)

fig = plt.figure(figsize=(12, 12))

for sp in range(0,4):
ax = fig.add_subplot(2,2,sp+1)
# Set the line width when specifying how each line should look.
ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)
ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=10)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(major_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.legend(loc='upper right')
plt.show()

stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
ax = fig.add_subplot(1,6,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968,2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.legend(loc='upper right')
plt.show()

fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
ax = fig.add_subplot(1,6,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()
fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
ax = fig.add_subplot(1,6,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")

if sp == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
elif sp == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
plt.show()

seaborn

import pandas as pd
titanic = pd.read_csv('train.csv')
cols = ['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
titanic = titanic[cols].dropna()
import seaborn as sns
import matplotlib.pyplot as plt
sns.distplot(titanic['Age'])
plt.show()

import seaborn as sns
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
def sinplot(flip=1):
x = np.linspace(0, 14, 100)
for i in range(1, 7):
plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)
sinplot()

sns.set()
sinplot()

5种主题风格

  • darkgrid
  • whitegrid
  • dark
  • white
  • ticks
sns.set_style("whitegrid")
data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
sns.boxplot(data=data)

sns.set_style("dark")
sinplot()

sns.set_style("white")
sinplot()

sns.set_style("ticks")
sinplot()

sinplot()
sns.despine()

sns.violinplot(data)
sns.despine(offset=10,trim=True) #轴线的距离
# sns.violinplot(data=data)
# sns.despine(offset=10, trim=True);

sns.set_style("whitegrid")
sns.boxplot(data=data,palette="deep")
sns.despine(left=True)

with sns.axes_style("darkgrid"):
plt.subplot(211)
sinplot()
plt.subplot(212)
sinplot(-1)

sns.set()
sns.set_context("paper")
plt.figure(figsize=(8, 6))
sinplot()

sns.set_context("talk")
plt.figure(figsize=(8, 6))
sinplot()

sns.set_context("poster")
plt.figure(figsize=(8, 6))
sinplot()

sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
sinplot()

调色板

  • 颜色很重要
  • color_palette()能传入任何Matplotlib所支持的颜色
  • color_palette()不写参数则默认颜色
  • set_palette()设置所有图的颜色

分类色板

current_palette = sns.color_palette()
sns.palplot(current_palette)

圆形画板

当你有六个以上的分类要区分时,最简单的方法就是在一个圆形的颜色空间中画出均匀间隔的颜色(这样的色调会保持亮度和饱和度不变)。这是大多数的当他们需要使用比当前默认颜色循环中设置的颜色更多时的默认方案。

最常用的方法是使用hls的颜色空间,这是RGB值的一个简单转换。

sns.palplot(sns.color_palette("hls", 8))

data = np.random.normal(size=(20, 8)) + np.arange(8) / 2
sns.boxplot(data=data,palette=sns.color_palette("hls", 8))

hls_palette()函数来控制颜色的亮度和饱和

  • l-亮度 lightness
  • s-饱和 saturation
sns.palplot(sns.hls_palette(8, l=.7, s=.9))

sns.palplot(sns.color_palette("Paired",8))

使用xkcd颜色来命名颜色

xkcd包含了一套众包努力的针对随机RGB色的命名。产生了954个可以随时通过xdcd_rgb字典中调用的命名颜色。

https://xkcd.com/color/rgb/

plt.plot([0, 1], [0, 1], sns.xkcd_rgb["pale red"], lw=3)           #https://xkcd.com/color/rgb/
plt.plot([0, 1], [0, 2], sns.xkcd_rgb["medium green"], lw=3)
plt.plot([0, 1], [0, 3], sns.xkcd_rgb["denim blue"], lw=3)
plt.plot([0, 1], [0, 4], sns.xkcd_rgb["olive"],lw = 5)

colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"]
sns.palplot(sns.xkcd_palette(colors))

连续色板

色彩随数据变换,比如数据越来越重要则颜色越来越深


Accent, Accent_r, Blues, Blues_r, BrBG, BrBG_r, BuGn, BuGn_r, BuPu, BuPu_r, CMRmap, CMRmap_r, Dark2, Dark2_r, GnBu, GnBu_r, Greens, Greens_r, Greys, Greys_r, OrRd, OrRd_r, Oranges, Oranges_r, PRGn, PRGn_r, Paired, Paired_r, Pastel1, Pastel1_r, Pastel2, Pastel2_r, PiYG, PiYG_r, PuBu, PuBuGn, PuBuGn_r, PuBu_r, PuOr, PuOr_r, PuRd, PuRd_r, Purples, Purples_r, RdBu, RdBu_r, RdGy, RdGy_r, RdPu, RdPu_r, RdYlBu, RdYlBu_r, RdYlGn, RdYlGn_r, Reds, Reds_r, Set1, Set1_r, Set2, Set2_r, Set3, Set3_r, Spectral, Spectral_r, Vega10, Vega10_r, Vega20, Vega20_r, Vega20b, Vega20b_r, Vega20c, Vega20c_r, Wistia, Wistia_r, YlGn, YlGnBu, YlGnBu_r, YlGn_r, YlOrBr, YlOrBr_r, YlOrRd, YlOrRd_r, afmhot, afmhot_r, autumn, autumn_r, binary, binary_r, bone, bone_r, brg, brg_r, bwr, bwr_r, cool, cool_r, coolwarm, coolwarm_r, copper, copper_r, cubehelix, cubehelix_r, flag, flag_r, gist_earth, gist_earth_r, gist_gray, gist_gray_r, gist_heat, gist_heat_r, gist_ncar, gist_ncar_r, gist_rainbow, gist_rainbow_r, gist_stern, gist_stern_r, gist_yarg, gist_yarg_r, gnuplot, gnuplot2, gnuplot2_r, gnuplot_r, gray, gray_r, hot, hot_r, hsv, hsv_r, icefire, icefire_r, inferno, inferno_r, jet, jet_r, magma, magma_r, mako, mako_r, nipy_spectral, nipy_spectral_r, ocean, ocean_r, pink, pink_r, plasma, plasma_r, prism, prism_r, rainbow, rainbow_r, rocket, rocket_r, seismic, seismic_r, spectral, spectral_r, spring, spring_r, summer, summer_r, tab10, tab10_r, tab20, tab20_r, tab20b, tab20b_r, tab20c, tab20c_r, terrain, terrain_r, viridis, viridis_r, vlag, vlag_r, winter, winter_r


sns.palplot(sns.color_palette("Blues"))
sns.palplot(sns.color_palette("Accent"))

如果想要翻转渐变,可以在面板名称中添加一个_r后缀

cubehelix_palette()调色板

色调线性变换

sns.palplot(sns.color_palette("cubehelix", 8))

sns.palplot(sns.cubehelix_palette(8, start=.5, rot=-.75))

sns.palplot(sns.cubehelix_palette(8, start=.75, rot=-.150))

light_palette() 和dark_palette()调用定制连续调色板

sns.palplot(sns.light_palette("green"))

sns.palplot(sns.dark_palette("purple"))

sns.palplot(sns.light_palette("navy", reverse=True))

sns.palplot(sns.light_palette((210, 90, 60), input="husl"))

x = np.random.normal(size=100)
sns.distplot(x,kde=False) #kde 核密度估计

sns.distplot(x, bins=20, kde=False)

from scipy import stats, integrate
x = np.random.gamma(6, size=200)
sns.distplot(x, kde=False, fit=stats.gamma)

mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
df

散点图

sns.jointplot(x="x", y="y", data=df)

sns.jointplot(x="x", y="y", data=df,kind = "reg")

x, y = np.random.multivariate_normal(mean, cov, 1000).T
with sns.axes_style("white"):
sns.jointplot(x=x, y=y, kind="hex", color="k")

iris = sns.load_dataset("iris")
sns.pairplot(iris)

tips = sns.load_dataset("tips")

tips.head()

regplot()和lmplot()都可以绘制回归关系,推荐regplot()

sns.lmplot(x="total_bill", y="tip", data=tips);

anscombe = sns.load_dataset("anscombe")
print(anscombe.head())
print(type(anscombe))
sns.regplot(x="x", y="y", data=anscombe.query("dataset == 'I'"),
ci=None, scatter_kws={"s": 80}) # ci 置信区间 query 查询布尔表达式所在的数据

sns.lmplot(x="x", y="y", data=anscombe.query("dataset == 'II'"),
ci=None, scatter_kws={"s": 80})

sns.lmplot(x="x", y="y", data=anscombe.query("dataset == 'II'"),
order=2, ci=None, scatter_kws={"s": 80}) #order : int, optional
#If order is greater than 1, use numpy.polyfit to estimate a polynomial regression.

sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips);

sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips,                 
markers=["o","*"], palette="Set1");
#hue 列名分割
#matplotlib.markers

sns.lmplot(x="total_bill", y="tip", hue="smoker", col="time", data=tips);

sns.lmplot(x="total_bill", y="tip", hue="smoker",
col="time", row="sex", data=tips);

f, ax = plt.subplots(figsize=(5, 5))
sns.regplot(x="total_bill", y="tip", data=tips, ax=ax);

sns.lmplot(x="total_bill", y="tip", col="day", data=tips,
col_wrap=2, size=4) #size 图size

sns.lmplot(x="total_bill", y="tip", col="day", data=tips,)

sns.lmplot(x="total_bill", y="tip", col="day", data=tips,aspect=.5)  # aspect 长宽比

多变量

sns.set(style="whitegrid", color_codes=True)
sns.stripplot(x="day", y="total_bill", data=tips)

重叠是很常见的现象,但是重叠影响我观察数据的量了

sns.stripplot(x="day", y="total_bill", data=tips, jitter=True) #抖动量

sns.swarmplot(x="day", y="total_bill", data=tips)

sns.swarmplot(x="day", y="total_bill", hue="sex",data=tips)

sns.swarmplot(x="total_bill", y="day", hue="time", data=tips)

盒图

  • IQR即统计学概念四分位距,第1/4分位与第3/4分位之间的距离
  • $N = 1.5IQR$ 如果一个值$>Q3+N$或$<Q1-N$,则为离群点
sns.boxplot(x="day", y="total_bill", hue="time", data=tips)

sns.violinplot(x="total_bill", y="day", hue="time", data=tips)

sns.violinplot(x="day", y="total_bill", hue="sex", data=tips, split=True)

sns.violinplot(x="day", y="total_bill", hue="sex", data=tips)

sns.violinplot(x="day", y="total_bill", data=tips, inner=None)  #inner 小提琴内部图形
sns.swarmplot(x="day", y="total_bill", data=tips, color="w", alpha=.5) # alpha 透明度

sns.violinplot(x="day", y="total_bill", data=tips, inner=None)
sns.swarmplot(x="day", y="total_bill", data=tips, color="w",)

条形图

显示值的集中趋势可以用条形图

titanic = sns.load_dataset("titanic")
print(titanic.describe())
print(titanic.info())
sns.barplot(x="sex", y="survived", hue="class", data=titanic)

点图

点图可以更好的描述变化差异

sns.pointplot(x="sex", y="survived", hue="class", data=titanic);

sns.pointplot(x="class", y="survived", hue="sex", data=titanic,
palette={"male": "g", "female": "m"},
markers=["^", "o"], linestyles=["-", "--"]);

宽形数据

sns.boxplot(data=iris,orient="h") #orient  垂直和水平

多层面板分类图

sns.factorplot(x="day", y="total_bill", hue="smoker", data=tips)

sns.factorplot(x="day", y="total_bill", hue="smoker", data=tips, kind="bar")

sns.factorplot(x="day", y="total_bill", hue="smoker",
col="time", data=tips, kind="swarm")

sns.factorplot(x="time", y="total_bill", hue="smoker",
col="day", data=tips, kind="box", size=4, aspect=.5)


seaborn.factorplot(x=None, y=None, hue=None, data=None, row=None, col=None, col_wrap=None, estimator=, ci=95, n_boot=1000, units=None, order=None, hue_order=None, row_order=None, col_order=None, kind=’point’, size=4, aspect=1, orient=None, color=None, palette=None, legend=True, legend_out=True, sharex=True, sharey=True, margin_titles=False, facet_kws=None, **kwargs)

Parameters:

  • x,y,hue 数据集变量 变量名
  • date 数据集 数据集名
  • row,col 更多分类变量进行平铺显示 变量名
  • col_wrap 每行的最高平铺数 整数
  • estimator 在每个分类中进行矢量到标量的映射 矢量
  • ci 置信区间 浮点数或None
  • n_boot 计算置信区间时使用的引导迭代次数 整数
  • units 采样单元的标识符,用于执行多级引导和重复测量设计 数据变量或向量数据
  • order, hue_order 对应排序列表 字符串列表
  • row_order, col_order 对应排序列表 字符串列表
  • kind : 可选:point 默认, bar 柱形图, count 频次, box 箱体, violin 提琴, strip 散点,swarm 分散点 size 每个面的高度(英寸) 标量 aspect 纵横比 标量 orient 方向 “v”/“h” color 颜色 matplotlib颜色 palette 调色板 seaborn颜色色板或字典 legend hue的信息面板 True/False legend_out 是否扩展图形,并将信息框绘制在中心右边 True/False share{x,y} 共享轴线 True/False

g = sns.FacetGrid(tips, col="time")

sns.set(style="ticks")
g = sns.FacetGrid(tips, col="time") #把数据中很多子集画出来

g = sns.FacetGrid(tips, col="time")                    #占位
print(help(g.map))
g.map(plt.hist, "tip") #画图

g = sns.FacetGrid(tips, col="sex", hue="smoker")
g.map(plt.scatter, "total_bill", "tip", alpha=.7)
g.add_legend()

g = sns.FacetGrid(tips, row="smoker", col="time", margin_titles=True)  #变量标题右侧,实验性并不总是有效
g.map(sns.regplot, "size", "total_bill", color=".1", fit_reg=False, x_jitter=.1) #color 颜色深浅 fit_reg 回归的线 x_jitter 浮动

g = sns.FacetGrid(tips, col="day", size=4, aspect=.5)
g.map(sns.barplot, "sex", "total_bill",order=["Male","Female"])

from pandas import Categorical
ordered_days = tips.day.value_counts().index
print (ordered_days)
ordered_days = Categorical(['Thur', 'Fri', 'Sat', 'Sun'])
g = sns.FacetGrid(tips, row="day", row_order=ordered_days,
size=1.7, aspect=4)
g.map(sns.boxplot, "total_bill",order=["Male","Female"])

pal = dict(Lunch="seagreen", Dinner="gray")
g = sns.FacetGrid(tips, hue="time", palette=pal, size=5)
g.map(plt.scatter, "total_bill", "tip", s=50, alpha=.7, linewidth=.5, edgecolors="red") #edgecolors 元素边界颜色
g.add_legend()

g = sns.FacetGrid(tips, hue="sex", palette="Set1", size=5, hue_kws={"marker": ["^", "v"]})
g.map(plt.scatter, "total_bill", "tip", s=100, linewidth=.5, edgecolor="white")
g.add_legend()

with sns.axes_style("white"):
g = sns.FacetGrid(tips, row="sex", col="smoker", margin_titles=True, size=2.5)
g.map(plt.scatter, "total_bill", "tip", color="#334488", edgecolor="white", lw=.5);
g.set_axis_labels("Total bill (US Dollars)", "Tip");
g.set(xticks=[10, 30, 50], yticks=[2, 6, 10]);
g.fig.subplots_adjust(wspace=.02, hspace=.02); #子图与子图
#g.fig.subplots_adjust(left = 0.125,right = 0.5,bottom = 0.1,top = 0.9, wspace=.02, hspace=.02)

iris = sns.load_dataset("iris")
g = sns.PairGrid(iris)
g.map(plt.scatter)

g = sns.PairGrid(iris)
g.map_diag(plt.hist) #对角线
g.map_offdiag(plt.scatter) #非对角线

g = sns.PairGrid(iris, hue="species")
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
g.add_legend();

g = sns.PairGrid(iris, vars=["sepal_length", "sepal_width"], hue="species")  #vars 取一部分
# print(iris.species.value_counts().index)
# print(iris.describe())
# print(iris.info())
# print(iris.head(10))
g.map(plt.scatter)

g = sns.PairGrid(tips, hue="size", palette="GnBu_d")
g.map(plt.scatter, s=50, edgecolor="white")
g.add_legend();

热力图

uniform_data = np.random.rand(3, 3)
print (uniform_data)
heatmap = sns.heatmap(uniform_data)

ax = sns.heatmap(uniform_data, vmin=0.2, vmax=0.5)  #最大最小取值

normal_data = np.random.randn(3, 3)
print (normal_data)
ax = sns.heatmap(normal_data, center=0) #中心值

flights = sns.load_dataset("flights")
flights.head()

flights = flights.pivot("month", "year", "passengers") #根据列值重塑数据
print (flights)
ax = sns.heatmap(flights)

ax = sns.heatmap(flights, annot=True,fmt="d")

ax = sns.heatmap(flights, linewidths=.5)

ax = sns.heatmap(flights, cmap="YlGnBu")

ax = sns.heatmap(flights, cbar=False) #隐藏bar

------ 本文结束 🎉🎉 谢谢观看 ------
0%