Pandas基礎學習

專注的阿熊發表於2021-05-10

df2 = pd.DataFrame({'A' : 1.,

                     'B' : pd.Timestamp('20130102'),

                     'C' : pd.Series(1,index=list(range(4)),dtype='float32'),

                     'D' : np.array([3] * 4,dtype='int32'),

                     'E' : pd.Categorical(["test","train","test","train"]),

                     'F' : 'foo'})

print(df2)

"""

      A          B    C  D      E    F

0  1.0 2013-01-02  1.0  3   test  foo

1  1.0 2013-01-02  1.0  3  train  foo

2  1.0 2013-01-02  1.0  3   test  foo

3  1.0 2013-01-02  1.0  3  train  foo

"""

# 可以利用 dtypes 屬性去檢視:

print(df2.dtypes)

"""

df2.dtypes

A           float64

B    datetime64[ns]

C           float32

D             int32

E          category

F            object

dtype: object

"""

# 看對列的序號 :

print(df2.index)

# Int64Index([0, 1, 2, 3], dtype='int64')

# 各種資料的名稱:

print(df2.columns)

# Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

# 只看值

print(df2.values)

"""

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],

        [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],

        [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],

        [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']], dtype=object)

"""

# 資料的總結:

df2.describe()

"""

          A    C    D

count  4.0  4.0  4.0

mean   1.0  1.0  3.0

std    0.0  0.0  0.0

min    1.0  1.0  3.0

25%    1.0  1.0  3.0

50%    1.0  1.0  3.0

75%    1.0  1.0  3.0

max    1.0  1.0  3.0

"""

# 外匯跟單gendan5.com 資料的轉置

print(df2.T)

"""                   

0                    1                    2  \

A                    1                    1                    1   

B  2013-01-02 00:00:00  2013-01-02 00:00:00  2013-01-02 00:00:00   

C                    1                    1                    1   

D                    3                    3                    3   

E                 test                train                 test   

F                  foo                  foo                  foo   

                      3  

A                    1  

B  2013-01-02 00:00:00  

C                    1  

D                    3  

E                train  

F                  foo  

"""

# 對資料的 index 進行排序並輸出

# axis=1 對列進行排序 是否升序

print(df2.sort_index(axis=1, ascending=False))

"""

      F      E  D    C          B    A

0  foo   test  3  1.0 2013-01-02  1.0

1  foo  train  3  1.0 2013-01-02  1.0

2  foo   test  3  1.0 2013-01-02  1.0

3  foo  train  3  1.0 2013-01-02  1.0

"""

# 對資料的值進行排序並輸出

print(df2.sort_values(by='B'))

"""

      A          B    C  D      E    F

0  1.0 2013-01-02  1.0  3   test  foo

1  1.0 2013-01-02  1.0  3  train  foo

2  1.0 2013-01-02  1.0  3   test  foo

3  1.0 2013-01-02  1.0  3  train  foo

"""


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/69946337/viewspace-2771625/,如需轉載,請註明出處,否則將追究法律責任。