Data Visualization in Pandas¶
In [1]:
#importing the libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
Plotting graph with plot() method¶
In [2]:
data=pd.Series(np.random.randn(1000).cumsum())
data
Out[2]:
0 -0.584127 1 -1.377490 2 -0.882970 3 0.461625 4 0.822463 ... 995 -28.696661 996 -28.190823 997 -29.000591 998 -28.343885 999 -29.862400 Length: 1000, dtype: float64
1. Line Plot¶
In [3]:
data.plot()
plt.show()
Another Example¶
In [8]:
df1 = pd.DataFrame(np.random.randn(100, 4),columns=['a','b','c','d'])
df1 = df1.cumsum()
df1
Out[8]:
a | b | c | d | |
---|---|---|---|---|
0 | -0.955439 | 0.221885 | -0.151688 | 0.727239 |
1 | -2.242434 | -0.084837 | 0.482656 | -0.276666 |
2 | -1.637941 | 0.278600 | -0.780838 | 0.148659 |
3 | -2.197805 | 0.620449 | 0.493361 | 0.993773 |
4 | -1.378125 | -0.694380 | -0.383419 | -0.634611 |
… | … | … | … | … |
95 | 3.821256 | -12.634502 | -8.880621 | -4.884173 |
96 | 3.650171 | -11.791885 | -9.615944 | -4.033960 |
97 | 4.060936 | -12.506512 | -8.989279 | -4.387667 |
98 | 4.774044 | -12.897997 | -10.557663 | -3.829007 |
99 | 4.483041 | -12.122258 | -10.096502 | -3.049107 |
100 rows × 4 columns
In [9]:
df1.plot()
plt.show()
2. Bar Graph¶
In [12]:
df2=pd.DataFrame(np.random.rand(7,3), columns=['a','b','c'])
df2
Out[12]:
a | b | c | |
---|---|---|---|
0 | 0.273284 | 0.169643 | 0.899382 |
1 | 0.564891 | 0.266047 | 0.257294 |
2 | 0.339101 | 0.343239 | 0.644331 |
3 | 0.841610 | 0.953456 | 0.942113 |
4 | 0.064272 | 0.341218 | 0.710116 |
5 | 0.517114 | 0.386171 | 0.055171 |
6 | 0.860341 | 0.688838 | 0.697439 |
In [13]:
df2.plot.bar()
plt.show()
3. Histogram¶
In [29]:
df = pd.read_csv("iris.data", header=None)
df.columns=["sepal_length","sepal_width", "petal_length", "petal_width", "species"]
In [30]:
df.head()
Out[30]:
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
In [31]:
df.plot.hist(alpha=0.5)
plt.show()
4. Boxplot Charts¶
In [32]:
df.plot.box()
plt.show()
In [33]:
# Changing the colors
colors={'boxes': 'Red', 'whiskers': 'blue','medians': 'Black', 'caps': 'Green'}
df.plot.box(color=colors)
plt.show()
To plot Horizontally¶
In [34]:
df.plot.box(vert=False)
plt.show()
5. Area Charts¶
In [35]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['a','b','c','d'])
df.head()
Out[35]:
a | b | c | d | |
---|---|---|---|---|
0 | 0.937786 | 0.418841 | 0.799605 | 0.119481 |
1 | 0.233060 | 0.488327 | 0.558990 | 0.271129 |
2 | 0.747060 | 0.356159 | 0.446656 | 0.736579 |
3 | 0.318732 | 0.837289 | 0.092116 | 0.409191 |
4 | 0.959139 | 0.061842 | 0.452362 | 0.846347 |
In [36]:
df.plot.area()
plt.show()
In [37]:
#for plotting specified column.
df['c'].plot.area()
plt.show()
6. Scatter Plots¶
In [26]:
df.plot.scatter(x='a', y='b')
plt.show()
In [38]:
df = pd.read_csv("iris.data", header=None)
df.columns=["sepal_length","sepal_width", "petal_length", "petal_width", "species"]
df.plot.scatter(x='sepal_length', y='sepal_width', s=df['petal_length'] * 50)
plt.show()
7. Pie Charts¶
In [40]:
df_avg=df["petal_width"].groupby(df["species"]).mean()
df_avg
Out[40]:
species Iris-setosa 0.244 Iris-versicolor 1.326 Iris-virginica 2.026 Name: petal_width, dtype: float64
In [41]:
df_avg.plot.pie()
Out[41]:
<AxesSubplot:ylabel='petal_width'>
8. Density Chart¶
In [42]:
df.plot.kde()
plt.show()