March 4, 2019

Python seaborn package

Visualization with seaborn package in Python


Seaborn is a Python library built on top of matplotlib.

import seaborn as sns

sns.set()
sns.set(style="darkgrid")
>>> sns.set_style("whitegrid")
sns.set(font_scale=1.25)
sns.set_style({'font.family': 'Helvetica'})
seaborn.set(style="whitegrid", color_codes=True)
sns.set(style='white', context='notebook', palette='deep')
sns.set(rc={"font.style":"normal",
            "axes.facecolor":(0.25, 0.25, 0.25),
            "figure.facecolor":(0.25, 0.25, 0.25),
            "text.color":"black",
            "xtick.color":"black",
            "ytick.color":"black",
            "axes.labelcolor":"black",
            "axes.grid":False,
            'axes.labelsize':30,
            'figure.figsize':(20.0, 10.0),
            'xtick.labelsize':25,
            'ytick.labelsize':20})

titanic = sns.load_dataset("titanic")
color = sns.color_palette()
plt_.set_xticklabels(plt_.get_xticklabels(), rotation=90)
leg = p.get_legend()
leg.set_title("Duration")
labs = leg.texts
labs[0].set_text("Short")
labs[1].set_text("Long")
leg.get_title().set_color('white')

Creating Line plot using seaborn package in Python
sns.lineplot(x, y)

Drawing Scatter plot using seaborn package in Python
ax = sns.scatterplot(x="stand_by_time", y="battery_capacity", data=data)
ax = sns.scatterplot(x="stand_by_time", y="battery_capacity", hue="thickness", data=data)

Plotting Box plot using seaborn package in Python
sns.boxplot(df['Sales'])
sns.boxplot(y = df['Order_Quantity'])
sns.boxplot(x = 'Product_Category', y='Sales', data=df)
sns.boxplot(x = 'Product_Category', y='Profit', hue = "Customer_Segment, data=df)
seaborn.boxplot(target, ax=ax[1]);
sns.boxplot(x=train_plot['Neighborhood'], y=train_plot['LotFrontage'], width=0.7, linewidth=0.8);
sns.boxplot(x="is_promoted", y="length_of_service", data=train, ax=axesbi[1,0])

countplot function, in seaborn package, is used to create count plots
g = sns.countplot(Y_train)
sns.countplot(x='education', hue='party', data=df, palette='RdBu')
ax = sns.countplot(x="class", data=titanic)
>>> ax = sns.countplot(x="class", hue="who", data=titanic)
seaborn.countplot(y="Product_Sub_Category", data=df)
ax = sns.countplot(x="who", data=titanic, facecolor=(0, 0, 0, 0), linewidth=5, edgecolor=sns.color_palette("dark", 3))
>>> ax = sns.countplot(x="who", data=titanic, palette="Set3")
p = sns.countplot(data=df, y = 'Category', hue = 'islong',  saturation=1)
p = sns.countplot(data=df, y = 'Category', hue = 'islong', saturation=1, xerr=7*np.arange(num_categories), edgecolor=(0,0,0), linewidth=2, fill=False)

Cat Plots in seaborn
sns.catplot(x="class", hue="who", col="survived", data=titanic, kind="count",  height=4, aspect=.7);
sns.catplot(y="Gender", x="Math_Score", hue="Parental_Level_of_Education", data=data, kind="bar")

Python seaborn Bar plots
>>> ax = sns.barplot(x="day", y="total_bill", data=tips)
>>> ax = sns.barplot(x="day", y="total_bill", hue="sex", data=tips)
seaborn.barplot(x='Product_Category', y='Sales', data=df, estimator=np.median)
ax = sns.barplot("day", "total_bill", data=tips, linewidth=2.5, facecolor=(1, 1, 1, 0), errcolor=".2", edgecolor=".2")
plt_ = sns.barplot(list(count.keys()), list(count.values()))
ax = sns.barplot(x=data['Gender'].value_counts().index, y=data['Gender'].value_counts().values, palette="Blues_d", hue=['female','male'])
sns.barplot(x=data['Gender'].value_counts().values, y=data['Gender'].value_counts().index, alpha=0.5, color='red', label='Gender')

Drawing Heat maps, in Python, using seaborn heatmap function
seaborn.heatmap(df.corr())
sns.heatmap(telecom.corr(), annot = True)
sns.heatmap(year_month, cmap="YlGnBu")
sns.heatmap(cor, cmap="YlGnBu", annot=True)
sns.heatmap(corrmat, vmax=.8, square=True);
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values)
sns.heatmap(weight_matrices[epoch], annot=True, cmap=my_cmap, vmin=-3, vmax=3)
sns.heatmap(data = train.isnull(), yticklabels=False, cbar=False, cmap=’viridis’)
sns.heatmap(pd.DataFrame(metrics.confusion_matrix(y_test, y_pred)), annot=True, cmap="YlGnBu", fmt='g')
imgplot = sns.heatmap(heatmap, xticklabels=False, yticklabels=False, vmax=prob_no)
sns.heatmap(npimg, xticklabels=False, yticklabels=False, cmap='Greys', ax=ax, cbar=False)

cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.heatmap(corr, cmap=cmap,linewidths=1, vmin=-1, vmax=1, square=True, cbar=True, center=0, ax=ax, mask=mask)

Python Seaborn Factor plots
>>> g = sns.factorplot(x="time", y="pulse", hue="kind", data=exercise)
g = sns.factorplot("alive", col="deck", col_wrap=4, data=titanic[titanic.deck.notnull()], kind="count", size=2.5, aspect=.8)

Seaborn Swarm Plots
_ = sns.swarmplot(x='state', y='share', data=df)
swarmplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, split=False, orient=None, color=None, palette=None, size=5, edgecolor='gray', linewidth=0, ax=None, **kwargs)

Point Plots in Seaborn Python package
ax = sns.pointplot(x="Reading_Score", y="Math_Score", hue="Gender", data=data)
seaborn.pointplot(x="Pclass", y="Survived", hue="Sex", data=data_train, palette={"male": "blue", "female": "pink"}, markers=["*", "o"], linestyles=["-", "--"]);
sns.pointplot(x='date_block_num', y='item_cnt_day', hue='shop_id', data=grouped[np.logical_and(count*id_per_graph <= grouped['shop_id'], grouped['shop_id'] < (count+1)*id_per_graph)], ax=axes[i][j])
sns.pointplot(x=np.arange(1,191), y=data[(data['Race/Ethnicity']=='group B')].Math_Score, color='lime', alpha=0.8)

Seaborn Joint Plots
sns.jointplot('Sales', 'Profit', df)
sns.jointplot(df['Sales'], df['Profit'])
sns.jointplot('Sales', 'Profit', df, kind="hex", color="k")
sns.jointplot(x='fico', y='int.rate', data=loans, color='purple')
sns.jointplot(x=np.arange(1,191), y=data[(data['Race/Ethnicity']=='group B')].Reading_Score,color='k').plot_joint(sns.kdeplot, zorder=0, n_levels=6)

Pair Plots in Seaborn package
sns.pairplot(tips)
sns.pairplot(data, x_vars=['TV', 'Radio', 'Newspaper'], y_vars='Sales', size=7, aspect=0.7)
ax = sns.pairplot(df, diag_kind="hist")
ax = sns.pairplot(df, diag_kind='hist', hue='liked')
seaborn.pairplot(df_train[cols], size = 2.5)
sns.pairplot(x_vars='length_of_service', y_vars='avg_training_score', hue='is_promoted', data=train, aspect=0.8, size=6)

Seaborn Violin Plots/Strip Plots
sns.violinplot(x='is_promoted', y='age', data=train)
sns.violinplot(x='day', y='tip', data=tips, inner=None, color='light gray')

sns.violinplot(y='log.annual.inc', hue='not.fully.paid', data=loan_data)

sns.stripplot(x='day', y='tip', data=tips, size=4, jitter=True)

Seaborn package Distribution Plots
sns.distplot(df['Shipping_Cost'])
sns.distplot(df['Sales'], hist=False)
seaborn.distplot(df['Sales'], bins = 50)
ax = sns.distplot(data["stand_by_time"], kde=False, rug=True, bins = 20)
sns.distplot(df_train['SalePrice'], fit=norm);
sns.distplot(df['Shipping_Cost'][:200], rug=True)       # rug plot
rugplot(a, height=0.05, axis='x', ax=None, **kwargs)
sns.distplot(target, ax=ax[0])
sns.distplot(train['length_of_service'].dropna(), kde=True, ax=axes[1,0])

g = sns.FacetGrid(train_df, col='Survived')
g = sns.FacetGrid(train_df, col='Survived', row='Pclass', size=2.2, aspect=1.6)

g.map(plt.hist, 'Age', bins=20)


sns.regplot(train.LotArea.apply(np.sqrt), target, ax=ax[0])

sns.tsplot(data=time_df)

Seaborn KDE plots
kde - kernel density  histogram
sns.kdeplot(data['Math_Score'])
sns.kdeplot(df['x1'], ax=ax1)
sns.kdeplot(data['Writing_Score'], shade=True, color='b')
sns.kdeplot(scaled_df['x1'], ax=ax2)
sns.kdeplot(x, y, cmap=cmap, shade=True, cut=5, ax=ax)
sns.kdeplot(data['Writing_Score'], data['Reading_Score'], cmap='Blues', shade=True, shade_lowest=False)

LM Plots in Seaborn package
sns.lmplot(x='Math_Score', y='Writing_Score', hue='Gender', data=data)
sns.lmplot(x='fico', y='int.rate', data=loans, hue='credit.policy', col='not.fully.paid', markers=['^','.'])
sns.lmplot(x='Math_Score', y='Writing_Score', hue='Gender', data=data, markers=['x','o'])

Related Python Articles: matplotlib package in Python   How to connect with Databases in Python ?


3 comments: