小提琴图
🔖 python
🔖 visualization
Salinity intrusiton 项目的特征分布可视化。
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# toc: 全局设置
config = {"font.size": 16}
plt.rcParams.update(config)
# toc: 读取文件,准备数据
dir_path = os.path.dirname(__file__) # directory path
file_path = os.path.join(dir_path, "Dataset_24.csv")
dataset = pd.read_csv(file_path, header=[0], index_col=[0]).iloc[:, :-2]
feature_ranking = [
"Water temperature",
"Water level 1",
"Tide",
"Temperature",
"Sea level pressure",
"Wind speed",
"Atmospheric pressure",
"Water level 2",
"Precipitation",
] # 想要的特征排序
dataset = dataset[feature_ranking] # 特征排序后的数据
# toc: figure
subplots_num = len(feature_ranking) # 子图个数
num_wid = 3 # 横向子图个数
num_hei = int(subplots_num / num_wid) # 纵向子图个数
fig = plt.figure(figsize=(8 * num_wid, 6 * num_hei))
axes = fig.subplots(nrows=num_hei, ncols=num_wid)
for i in range(num_hei): # 按横向画图,第 i 行
for j in range(num_wid): # 第 i 行的第 j 个子图
tmp = i * num_wid + j # 第 tmp 个子图
tmp_data = dataset[feature_ranking[tmp]] # 当前特征的数据
violin = axes[i, j].violinplot(
dataset=tmp_data,
vert=False,
showextrema=False,
)
for patch in violin[
"bodies"
]: # A list of the PolyCollection instances containing the filled area of each violin http://matplotlib.org/3.6.3/api/_as_gen/matplotlib.axes.Axes.violin.html#matplotlib.axes.Axes.violin
patch.set_facecolor((232 / 255, 125 / 255, 115 / 255)) # 填充色
patch.set_edgecolor("black") # 边框颜色
patch.set_alpha(1) # 饱和度
quantile1, median, quantile3 = np.percentile(tmp_data, [25, 50, 75]) # 选择要画的分位数
IQR = quantile3 - quantile1
left_IQR, right_IQP = quantile1 - 1.5 * IQR, quantile3 + 1.5 * IQR # 置信区间
axes[i, j].scatter(median, 1.0, color="white", zorder=4) # 中位数
axes[i, j].hlines(y=1.0, xmin=quantile1, xmax=quantile3, lw=9, zorder=3) # 四分位
axes[i, j].hlines(y=1.0, xmin=left_IQR, xmax=right_IQP, lw=2, zorder=2)
# 图形设定
axes[i, j].set_xlabel(feature_ranking[tmp])
axes[i, j].tick_params(axis="x", which="major", direction="inout")
axes[i, j].tick_params(axis="y", which="both", left=False)
axes[i, j].set_yticklabels([]) # 隐藏标签
# toc: 保存
fig_path = os.path.join(
dir_path.split("Dataset")[0], "Figure/Distribution", "Violin.pdf"
)
fig.savefig(fig_path)
fig_path = os.path.join(dir_path, "Violin.pdf")
fig.savefig(fig_path)