#import libraries
import pandas as pd
import numpy as np


data=pd.DataFrame({"a":[1,5]*3,"b":[1,1,2,3,2,3]})
data


#To see whether the row is repeated or not, you can use the duplicated method.
data.duplicated()

0    False
1    False
2    False
3    False
4     True
5     True
dtype: bool


#To remove duplicate rows, you can use the drop_duplicates method which returns a data frame.
data.drop_duplicates()


import pandas as pd
data={'Name':['A','B','C','D'],'Age':[42,54,20,63]}
df=pd.DataFrame(data,index=['value1','value2','value3','value4'])
df


score={'A':24,'B':30,'C':36,'D':42}
# Converting the first character of the name to uppercase so that the names are same.
n=df["Name"].str.capitalize()
n

value1    A
value2    B
value3    C
value4    D
Name: Name, dtype: object


#Now, adding the score variable to df dataset using map() method.
df["Score"]=n.map(score)
df


s=pd.Series([14,55,87,966,78,44])
s

0     14
1     55
2     87
3    966
4     78
5     44
dtype: int64


#replacing the value.
s.replace(87,np.nan)

0     14.0
1     55.0
2      NaN
3    966.0
4     78.0
5     44.0
dtype: float64


#replacing multiple values.
s.replace([78,14],[np.nan,566])

0    566.0
1     55.0
2     87.0
3    966.0
4      NaN
5     44.0
dtype: float64


import pandas as pd
import numpy as np

df=pd.DataFrame(np.arange(12).reshape(3,4),index=[0,1,2],columns=["a","b","c","d"])
df


#Create a variable and pass it into the dataset
s=pd.Series(["one","two","three"])
df.index=df.index.map(s)
df


#capitalize the row and column names with the rename method
df.rename(index=str.title,columns=str.upper)


df.rename(index={"one":"10"},columns={"d":"f"},inplace=True)
df


data=pd.DataFrame(np.random.randn(1000,4))
data


data.head()


#using the describe method to see summary statistics.
data.describe()


#assigning the column with the 1st index of the data to the col variable.
col=data[1]


col[np.abs(col)>3]

446   -3.126541
938   -3.488546
Name: 1, dtype: float64


#To find rows with at least one absolute value exceeding 3 in the entire data set

data[(np.abs(data)>3).any(1)]

	A	B	C	D
One	0	1	2	3
Two	4	5	6	7
Three	8	9	10	11

	0	1	2	3
0	1.095845	-0.579053	0.021126	-0.615226
1	1.265209	1.606119	-0.167518	0.022493
2	0.260408	0.015183	0.986117	0.203856
3	-0.805561	0.224160	0.992170	-1.046847
4	-0.257810	0.598237	1.087677	1.733996
...	...	...	...	...
995	0.172482	0.239283	-0.460610	0.383162
996	-0.012413	1.782691	0.706961	-1.101175
997	-0.420072	-0.071259	-1.346796	0.795517
998	-0.151630	0.788102	1.759462	0.306519
999	-2.626494	1.541084	0.057281	0.153461

	0	1	2	3
0	1.095845	-0.579053	0.021126	-0.615226
1	1.265209	1.606119	-0.167518	0.022493
2	0.260408	0.015183	0.986117	0.203856
3	-0.805561	0.224160	0.992170	-1.046847
4	-0.257810	0.598237	1.087677	1.733996

	0	1	2	3
count	1000.000000	1000.000000	1000.000000	1000.000000
mean	-0.032380	-0.003029	0.074722	0.029196
std	0.991572	0.972940	0.937606	0.956663
min	-3.541948	-3.488546	-2.799998	-3.595273
25%	-0.663346	-0.640519	-0.556768	-0.658451
50%	-0.026216	-0.038468	0.085684	0.041386
75%	0.583050	0.647501	0.676943	0.691507
max	3.232359	2.951305	3.066394	3.040598

	0	1	2	3
331	-3.541948	-1.260699	0.602065	-0.489064
396	-3.121986	0.180793	0.289388	-0.949250
446	1.407799	-3.126541	0.692714	0.296811
619	-0.239821	-1.725038	-0.766128	-3.595273
676	-0.037124	-0.861719	3.066394	1.267702
713	3.045511	-0.058880	-0.191928	0.909453
912	3.232359	0.292739	-0.357451	0.677699
938	-0.731734	-3.488546	0.187923	-1.593345
973	0.495800	-1.063279	-1.364449	3.040598

Data Transformation in Pandas¶

Finding Duplicate Values¶

Mapping¶

Replacing¶

Renaming¶

Finding Specific value in a dataset¶

	a	b	c	d
one	0	1	2	3
two	4	5	6	7
three	8	9	10	11

	a	b	c	f
10	0	1	2	3
two	4	5	6	7
three	8	9	10	11