【pandas學習筆記】Series

Datawhale發表於2018-07-12
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

建立Series以及對Series的相關操作

# 自動新增索引
# np.nan:empty value
>>>s1 = pd.Series([1,2,3,4,np.nan,5,6,7]) 
0    1.0
1    2.0
2    3.0
3    4.0
4    NaN
5    5.0
6    6.0
7    7.0
dtype: float64

>>>s1.values
array([ 1.,  2.,  3.,  4., nan,  5.,  6.,  7.])

>>>s1.index
RangeIndex(start=0, stop=8, step=1)
# 主動新增索引
>>>s2 = pd.Series([21,23,42,21,23],index=['Jack','Lucy','Helen','Milky','Jasper'])
Jack      21
Lucy      23
Helen     42
Milky     21
Jasper    23
dtype: int64

>>>s2['Jack']
21

>>>s2.loc['Jack'] # 根據名字索引
21

>>>s2.iloc[0] # 根據位置索引
21

>>>print(s2.shape,s2.size)
(5,) 5

>>>s2.head(2) #選擇頭兩行,預設前五行
Jack    21
Lucy    23
dtype: int64

>>>s2.describe()
count     5.0
mean     26.0
std       9.0
min      21.0
25%      21.0
50%      23.0
75%      23.0
max      42.0
dtype: float64

>>>s2.sort_values() # 對values排序
Jack      21
Milky     21
Lucy      23
Jasper    23
Helen     42
dtype: int64

>>>s2[s2>22]# Check the people who is older than 22
Lucy      23
Helen     42
Jasper    23
dtype: int64

>>>'Lucy' in s2
True

>>>s2_dict = s2.to_dict() # Series轉換為字典
{'Helen': 42, 'Jack': 21, 'Jasper': 23, 'Lucy': 23, 'Milky': 21}

>>>s2_series = pd.Series(s2_dict) # 字典轉換為Series
Helen     42
Jack      21
Jasper    23
Lucy      23
Milky     21
dtype: int64

# 檢查dict中沒有的索引,自動新增
>>>name = ['Jack','Lucy','Helen','Milky','Tom','Jasper','Helen']
>>>s2_new = pd.Series(s2_dict,index = name)
Jack      21.0
Lucy      23.0
Helen     42.0
Milky     21.0
Tom        NaN
Jasper    23.0
Helen     42.0
dtype: float64

>>>s2_new.drop_duplicates() ## drop the duplicate in value 去掉values中的重複值,保留第一個
Jack     21.0
Lucy     23.0
Helen    42.0
Tom       NaN
dtype: float64

>>>pd.isnull(s2_new) #判斷是否為空值 = s2_new.isnull()
Jack      False
Lucy      False
Helen     False
Milky     False
Tom        True
Jasper    False
Helen     False
dtype: bool

相關文章