小提琴图

🔖 python
🔖 visualization
Author

Guangyao Zhao

Published

Feb 9, 2023

Salinity intrusiton 项目的特征分布可视化。

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# toc: 全局设置
config = {"font.size": 16}
plt.rcParams.update(config)

# toc: 读取文件,准备数据
dir_path = os.path.dirname(__file__)  # directory path
file_path = os.path.join(dir_path, "Dataset_24.csv")
dataset = pd.read_csv(file_path, header=[0], index_col=[0]).iloc[:, :-2]
feature_ranking = [
    "Water temperature",
    "Water level 1",
    "Tide",
    "Temperature",
    "Sea level pressure",
    "Wind speed",
    "Atmospheric pressure",
    "Water level 2",
    "Precipitation",
]  # 想要的特征排序
dataset = dataset[feature_ranking]  # 特征排序后的数据


# toc: figure
subplots_num = len(feature_ranking)  # 子图个数
num_wid = 3  # 横向子图个数
num_hei = int(subplots_num / num_wid)  # 纵向子图个数
fig = plt.figure(figsize=(8 * num_wid, 6 * num_hei))
axes = fig.subplots(nrows=num_hei, ncols=num_wid)
for i in range(num_hei):  # 按横向画图,第 i 行
    for j in range(num_wid):  # 第 i 行的第 j 个子图
        tmp = i * num_wid + j  # 第 tmp 个子图
        tmp_data = dataset[feature_ranking[tmp]]  # 当前特征的数据
        violin = axes[i, j].violinplot(
            dataset=tmp_data,
            vert=False,
            showextrema=False,
        )
        for patch in violin[
            "bodies"
        ]:  # A list of the PolyCollection instances containing the filled area of each violin http://matplotlib.org/3.6.3/api/_as_gen/matplotlib.axes.Axes.violin.html#matplotlib.axes.Axes.violin
            patch.set_facecolor((232 / 255, 125 / 255, 115 / 255))  # 填充色
            patch.set_edgecolor("black")  # 边框颜色
            patch.set_alpha(1)  # 饱和度

        quantile1, median, quantile3 = np.percentile(tmp_data, [25, 50, 75])  # 选择要画的分位数
        IQR = quantile3 - quantile1
        left_IQR, right_IQP = quantile1 - 1.5 * IQR, quantile3 + 1.5 * IQR  # 置信区间
        axes[i, j].scatter(median, 1.0, color="white", zorder=4)  # 中位数
        axes[i, j].hlines(y=1.0, xmin=quantile1, xmax=quantile3, lw=9, zorder=3)  # 四分位
        axes[i, j].hlines(y=1.0, xmin=left_IQR, xmax=right_IQP, lw=2, zorder=2)

        # 图形设定
        axes[i, j].set_xlabel(feature_ranking[tmp])
        axes[i, j].tick_params(axis="x", which="major", direction="inout")
        axes[i, j].tick_params(axis="y", which="both", left=False)
        axes[i, j].set_yticklabels([])  # 隐藏标签

# toc: 保存
fig_path = os.path.join(
    dir_path.split("Dataset")[0], "Figure/Distribution", "Violin.pdf"
)
fig.savefig(fig_path)

fig_path = os.path.join(dir_path, "Violin.pdf")
fig.savefig(fig_path)