In [1]:
%matplotlib inline
In [2]:
import pandas as pd
In [3]:
import numpy as np
In [4]:
import matplotlib.pyplot as plt
In [5]:
s = pd.Series([1,3,5,np.nan,6,8])
In [6]:
s
Out[6]:
0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
In [7]:
dates = pd.date_range('20130101', periods=6)
In [8]:
dates
Out[8]:
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
In [9]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
In [10]:
df
Out[10]:
A B C D
2013-01-01 -1.234543 -0.181094 0.602767 0.024092
2013-01-02 -1.012344 -1.747663 -0.446165 -0.308272
2013-01-03 0.108684 -0.698606 0.294223 0.779937
2013-01-04 0.870050 -0.162659 -1.842476 -0.828424
2013-01-05 1.586294 -0.369570 0.887738 -0.067130
2013-01-06 0.603919 -0.292847 -0.351932 -0.546226
In [11]:
df2 = pd.DataFrame({'A' : 1., 
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo'}) 
In [12]:
df2
Out[12]:
A B C D E F
0 1.0 2013-01-02 1.0 3 test foo
1 1.0 2013-01-02 1.0 3 train foo
2 1.0 2013-01-02 1.0 3 test foo
3 1.0 2013-01-02 1.0 3 train foo
In [13]:
df2.dtypes
Out[13]:
A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object
In [14]:
df.head()
Out[14]:
A B C D
2013-01-01 -1.234543 -0.181094 0.602767 0.024092
2013-01-02 -1.012344 -1.747663 -0.446165 -0.308272
2013-01-03 0.108684 -0.698606 0.294223 0.779937
2013-01-04 0.870050 -0.162659 -1.842476 -0.828424
2013-01-05 1.586294 -0.369570 0.887738 -0.067130
In [15]:
df.tail(3)
Out[15]:
A B C D
2013-01-04 0.870050 -0.162659 -1.842476 -0.828424
2013-01-05 1.586294 -0.369570 0.887738 -0.067130
2013-01-06 0.603919 -0.292847 -0.351932 -0.546226
In [16]:
df.index
Out[16]:
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
In [17]:
df.columns
Out[17]:
Index([u'A', u'B', u'C', u'D'], dtype='object')
In [18]:
df.values
Out[18]:
array([[-1.23454325, -0.18109375,  0.60276697,  0.02409219],
       [-1.01234439, -1.74766336, -0.44616522, -0.30827222],
       [ 0.10868385, -0.69860582,  0.29422346,  0.77993654],
       [ 0.87005021, -0.16265896, -1.84247599, -0.82842427],
       [ 1.58629419, -0.36956975,  0.88773803, -0.06712956],
       [ 0.60391916, -0.29284674, -0.35193192, -0.54622645]])
In [19]:
df.describe()
Out[19]:
A B C D
count 6.000000 6.000000 6.000000 6.000000
mean 0.153677 -0.575406 -0.142641 -0.157671
std 1.100644 0.606204 0.983212 0.555681
min -1.234543 -1.747663 -1.842476 -0.828424
25% -0.732087 -0.616347 -0.422607 -0.486738
50% 0.356302 -0.331208 -0.028854 -0.187701
75% 0.803517 -0.209032 0.525631 0.001287
max 1.586294 -0.162659 0.887738 0.779937
In [20]:
df.T
Out[20]:
2013-01-01 00:00:00 2013-01-02 00:00:00 2013-01-03 00:00:00 2013-01-04 00:00:00 2013-01-05 00:00:00 2013-01-06 00:00:00
A -1.234543 -1.012344 0.108684 0.870050 1.586294 0.603919
B -0.181094 -1.747663 -0.698606 -0.162659 -0.369570 -0.292847
C 0.602767 -0.446165 0.294223 -1.842476 0.887738 -0.351932
D 0.024092 -0.308272 0.779937 -0.828424 -0.067130 -0.546226
In [21]:
df.sort_index(axis=1, ascending=False)
Out[21]:
D C B A
2013-01-01 0.024092 0.602767 -0.181094 -1.234543
2013-01-02 -0.308272 -0.446165 -1.747663 -1.012344
2013-01-03 0.779937 0.294223 -0.698606 0.108684
2013-01-04 -0.828424 -1.842476 -0.162659 0.870050
2013-01-05 -0.067130 0.887738 -0.369570 1.586294
2013-01-06 -0.546226 -0.351932 -0.292847 0.603919
In [22]:
df.sort_values(by='B')
Out[22]:
A B C D
2013-01-02 -1.012344 -1.747663 -0.446165 -0.308272
2013-01-03 0.108684 -0.698606 0.294223 0.779937
2013-01-05 1.586294 -0.369570 0.887738 -0.067130
2013-01-06 0.603919 -0.292847 -0.351932 -0.546226
2013-01-01 -1.234543 -0.181094 0.602767 0.024092
2013-01-04 0.870050 -0.162659 -1.842476 -0.828424
In [23]:
df['A']
Out[23]:
2013-01-01   -1.234543
2013-01-02   -1.012344
2013-01-03    0.108684
2013-01-04    0.870050
2013-01-05    1.586294
2013-01-06    0.603919
Freq: D, Name: A, dtype: float64
In [24]:
df[0:3]
Out[24]:
A B C D
2013-01-01 -1.234543 -0.181094 0.602767 0.024092
2013-01-02 -1.012344 -1.747663 -0.446165 -0.308272
2013-01-03 0.108684 -0.698606 0.294223 0.779937
In [25]:
df['20130102':'20130104']
Out[25]:
A B C D
2013-01-02 -1.012344 -1.747663 -0.446165 -0.308272
2013-01-03 0.108684 -0.698606 0.294223 0.779937
2013-01-04 0.870050 -0.162659 -1.842476 -0.828424
In [26]:
df.loc[dates[0]]
Out[26]:
A   -1.234543
B   -0.181094
C    0.602767
D    0.024092
Name: 2013-01-01 00:00:00, dtype: float64
In [27]:
df.loc[:,['A','B']]
Out[27]:
A B
2013-01-01 -1.234543 -0.181094
2013-01-02 -1.012344 -1.747663
2013-01-03 0.108684 -0.698606
2013-01-04 0.870050 -0.162659
2013-01-05 1.586294 -0.369570
2013-01-06 0.603919 -0.292847
In [28]:
df.loc['20130102':'20130104',['A','B']]
Out[28]:
A B
2013-01-02 -1.012344 -1.747663
2013-01-03 0.108684 -0.698606
2013-01-04 0.870050 -0.162659
In [29]:
df.loc['20130102',['A','B']]
Out[29]:
A   -1.012344
B   -1.747663
Name: 2013-01-02 00:00:00, dtype: float64
In [30]:
df.loc[dates[0],'A']
Out[30]:
-1.2345432480819052
In [31]:
df.at[dates[0],'A']
Out[31]:
-1.2345432480819052
In [32]:
df.iloc[3]
Out[32]:
A    0.870050
B   -0.162659
C   -1.842476
D   -0.828424
Name: 2013-01-04 00:00:00, dtype: float64
In [33]:
df.iloc[3:5,0:2]
Out[33]:
A B
2013-01-04 0.870050 -0.162659
2013-01-05 1.586294 -0.369570
In [34]:
df.iloc[[1,2,4],[0,2]]
Out[34]:
A C
2013-01-02 -1.012344 -0.446165
2013-01-03 0.108684 0.294223
2013-01-05 1.586294 0.887738
In [35]:
df.iloc[1:3,:]
Out[35]:
A B C D
2013-01-02 -1.012344 -1.747663 -0.446165 -0.308272
2013-01-03 0.108684 -0.698606 0.294223 0.779937
In [36]:
df.iloc[:,1:3]
Out[36]:
B C
2013-01-01 -0.181094 0.602767
2013-01-02 -1.747663 -0.446165
2013-01-03 -0.698606 0.294223
2013-01-04 -0.162659 -1.842476
2013-01-05 -0.369570 0.887738
2013-01-06 -0.292847 -0.351932
In [37]:
df.iloc[1,1]
Out[37]:
-1.7476633559261565
In [38]:
df.iat[1,1]
Out[38]:
-1.7476633559261565
In [39]:
df[df.A>0]
Out[39]:
A B C D
2013-01-03 0.108684 -0.698606 0.294223 0.779937
2013-01-04 0.870050 -0.162659 -1.842476 -0.828424
2013-01-05 1.586294 -0.369570 0.887738 -0.067130
2013-01-06 0.603919 -0.292847 -0.351932 -0.546226
In [40]:
df[df > 0]
Out[40]:
A B C D
2013-01-01 NaN NaN 0.602767 0.024092
2013-01-02 NaN NaN NaN NaN
2013-01-03 0.108684 NaN 0.294223 0.779937
2013-01-04 0.870050 NaN NaN NaN
2013-01-05 1.586294 NaN 0.887738 NaN
2013-01-06 0.603919 NaN NaN NaN
In [41]:
df2 = df.copy()
In [42]:
df2['E'] = ['one','one','two','three','four','three']
In [43]:
df2
Out[43]:
A B C D E
2013-01-01 -1.234543 -0.181094 0.602767 0.024092 one
2013-01-02 -1.012344 -1.747663 -0.446165 -0.308272 one
2013-01-03 0.108684 -0.698606 0.294223 0.779937 two
2013-01-04 0.870050 -0.162659 -1.842476 -0.828424 three
2013-01-05 1.586294 -0.369570 0.887738 -0.067130 four
2013-01-06 0.603919 -0.292847 -0.351932 -0.546226 three
In [44]:
df2[df2['E'].isin(['two','four'])]
Out[44]:
A B C D E
2013-01-03 0.108684 -0.698606 0.294223 0.779937 two
2013-01-05 1.586294 -0.369570 0.887738 -0.067130 four
In [45]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
In [46]:
s1
Out[46]:
2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64
In [47]:
df['F'] = s1
In [48]:
df.at[dates[0],'A'] = 0
In [49]:
df.iat[0,1] = 0
In [50]:
df.loc[:,'D'] = np.array([5] * len(df))
In [51]:
df
Out[51]:
A B C D F
2013-01-01 0.000000 0.000000 0.602767 5 NaN
2013-01-02 -1.012344 -1.747663 -0.446165 5 1.0
2013-01-03 0.108684 -0.698606 0.294223 5 2.0
2013-01-04 0.870050 -0.162659 -1.842476 5 3.0
2013-01-05 1.586294 -0.369570 0.887738 5 4.0
2013-01-06 0.603919 -0.292847 -0.351932 5 5.0
In [52]:
df2 = df.copy()
In [53]:
df2[df2 > 0] = -df2
In [54]:
df2
Out[54]:
A B C D F
2013-01-01 0.000000 0.000000 -0.602767 -5 NaN
2013-01-02 -1.012344 -1.747663 -0.446165 -5 -1.0
2013-01-03 -0.108684 -0.698606 -0.294223 -5 -2.0
2013-01-04 -0.870050 -0.162659 -1.842476 -5 -3.0
2013-01-05 -1.586294 -0.369570 -0.887738 -5 -4.0
2013-01-06 -0.603919 -0.292847 -0.351932 -5 -5.0
In [55]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
In [56]:
df1.loc[dates[0]:dates[1],'E'] = 1
In [57]:
df1
Out[57]:
A B C D F E
2013-01-01 0.000000 0.000000 0.602767 5 NaN 1.0
2013-01-02 -1.012344 -1.747663 -0.446165 5 1.0 1.0
2013-01-03 0.108684 -0.698606 0.294223 5 2.0 NaN
2013-01-04 0.870050 -0.162659 -1.842476 5 3.0 NaN
In [58]:
df1.dropna(how='any')
Out[58]:
A B C D F E
2013-01-02 -1.012344 -1.747663 -0.446165 5 1.0 1.0
In [59]:
df1.fillna(value=5)
Out[59]:
A B C D F E
2013-01-01 0.000000 0.000000 0.602767 5 5.0 1.0
2013-01-02 -1.012344 -1.747663 -0.446165 5 1.0 1.0
2013-01-03 0.108684 -0.698606 0.294223 5 2.0 5.0
2013-01-04 0.870050 -0.162659 -1.842476 5 3.0 5.0
In [60]:
pd.isnull(df1)
Out[60]:
A B C D F E
2013-01-01 False False False False True False
2013-01-02 False False False False False False
2013-01-03 False False False False False True
2013-01-04 False False False False False True
In [61]:
df.mean()
Out[61]:
A    0.359434
B   -0.545224
C   -0.142641
D    5.000000
F    3.000000
dtype: float64
In [62]:
df.mean(1)
Out[62]:
2013-01-01    1.400692
2013-01-02    0.558765
2013-01-03    1.340860
2013-01-04    1.372983
2013-01-05    2.220892
2013-01-06    1.991828
Freq: D, dtype: float64
In [63]:
s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)
In [64]:
s
Out[64]:
2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64
In [65]:
df.sub(s, axis='index')
Out[65]:
A B C D F
2013-01-01 NaN NaN NaN NaN NaN
2013-01-02 NaN NaN NaN NaN NaN
2013-01-03 -0.891316 -1.698606 -0.705777 4.0 1.0
2013-01-04 -2.129950 -3.162659 -4.842476 2.0 0.0
2013-01-05 -3.413706 -5.369570 -4.112262 0.0 -1.0
2013-01-06 NaN NaN NaN NaN NaN
In [66]:
df.apply(np.cumsum)
Out[66]:
A B C D F
2013-01-01 0.000000 0.000000 0.602767 5 NaN
2013-01-02 -1.012344 -1.747663 0.156602 10 1.0
2013-01-03 -0.903661 -2.446269 0.450825 15 3.0
2013-01-04 -0.033610 -2.608928 -1.391651 20 6.0
2013-01-05 1.552684 -2.978498 -0.503913 25 10.0
2013-01-06 2.156603 -3.271345 -0.855845 30 15.0
In [67]:
df.apply(lambda x: x.max() - x.min())
Out[67]:
A    2.598639
B    1.747663
C    2.730214
D    0.000000
F    4.000000
dtype: float64
In [68]:
s = pd.Series(np.random.randint(0, 7, size=10))
In [69]:
s
Out[69]:
0    1
1    3
2    1
3    6
4    6
5    6
6    2
7    6
8    1
9    2
dtype: int32
In [70]:
s.value_counts()
Out[70]:
6    4
1    3
2    2
3    1
dtype: int64
In [71]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
In [72]:
s.str.lower()
Out[72]:
0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object
In [73]:
df = pd.DataFrame(np.random.randn(10, 4))
In [74]:
df
Out[74]:
0 1 2 3
0 0.087190 0.378057 1.230033 -1.916034
1 1.845394 -0.493289 1.341151 -0.312285
2 -1.414530 0.567152 -1.266626 -0.824010
3 0.085407 1.743999 1.059148 0.967338
4 1.968624 -0.456669 -1.170885 0.144196
5 -0.095139 -0.307934 1.317358 -0.744105
6 -1.557237 -1.185375 0.840714 1.070566
7 0.049833 -0.967053 1.201201 -0.469980
8 0.782234 0.027085 0.573083 1.116846
9 1.293508 1.188724 -1.044702 -0.973416
In [75]:
pieces = [df[:3], df[3:7], df[7:]]
In [76]:
pd.concat(pieces)
Out[76]:
0 1 2 3
0 0.087190 0.378057 1.230033 -1.916034
1 1.845394 -0.493289 1.341151 -0.312285
2 -1.414530 0.567152 -1.266626 -0.824010
3 0.085407 1.743999 1.059148 0.967338
4 1.968624 -0.456669 -1.170885 0.144196
5 -0.095139 -0.307934 1.317358 -0.744105
6 -1.557237 -1.185375 0.840714 1.070566
7 0.049833 -0.967053 1.201201 -0.469980
8 0.782234 0.027085 0.573083 1.116846
9 1.293508 1.188724 -1.044702 -0.973416
In [77]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
In [78]:
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
In [79]:
left
Out[79]:
key lval
0 foo 1
1 foo 2
In [80]:
right
Out[80]:
key rval
0 foo 4
1 foo 5
In [81]:
pd.merge(left, right, on='key')
Out[81]:
key lval rval
0 foo 1 4
1 foo 1 5
2 foo 2 4
3 foo 2 5
In [82]:
df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
In [83]:
df
Out[83]:
A B C D
0 -1.137633 1.190761 -1.589958 0.274725
1 -1.182860 -0.964490 0.701884 -0.209471
2 1.708852 -0.926927 -1.044458 -1.501952
3 0.956593 0.605129 0.929932 0.382138
4 0.443152 0.795685 -1.092606 0.384814
5 0.570427 0.134011 0.425411 0.672966
6 0.557559 -1.400583 1.328377 -1.237753
7 2.263220 0.883696 1.772972 -1.388512
In [84]:
s = df.iloc[3]
In [85]:
df.append(s, ignore_index=True)
Out[85]:
A B C D
0 -1.137633 1.190761 -1.589958 0.274725
1 -1.182860 -0.964490 0.701884 -0.209471
2 1.708852 -0.926927 -1.044458 -1.501952
3 0.956593 0.605129 0.929932 0.382138
4 0.443152 0.795685 -1.092606 0.384814
5 0.570427 0.134011 0.425411 0.672966
6 0.557559 -1.400583 1.328377 -1.237753
7 2.263220 0.883696 1.772972 -1.388512
8 0.956593 0.605129 0.929932 0.382138
In [86]:
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
                          'foo', 'bar', 'foo', 'foo'],
                   'B' : ['one', 'one', 'two', 'three',
                           'two', 'two', 'one', 'three'],
                   'C' : np.random.randn(8),
                   'D' : np.random.randn(8)})
In [87]:
df
Out[87]:
A B C D
0 foo one 0.135346 -0.916001
1 bar one -0.907362 0.550130
2 foo two -1.758074 0.264998
3 bar three 0.153849 0.566296
4 foo two -1.033947 0.551321
5 bar two 0.494596 -0.859861
6 foo one 0.347631 0.106605
7 foo three 0.785117 -0.138477
In [88]:
df.groupby('A').sum()
Out[88]:
C D
A
bar -0.258916 0.256565
foo -1.523927 -0.131554
In [89]:
df.groupby(['A','B']).sum()
Out[89]:
C D
A B
bar one -0.907362 0.550130
three 0.153849 0.566296
two 0.494596 -0.859861
foo one 0.482977 -0.809396
three 0.785117 -0.138477
two -2.792021 0.816319
In [90]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 
                     'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two',
                      'one', 'two', 'one', 'two']]))
In [91]:
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
In [92]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
In [93]:
df2 = df[:4]
In [94]:
df2
Out[94]:
A B
first second
bar one -0.723323 0.071984
two -1.068435 -0.485561
baz one 0.018726 -0.379056
two 2.051579 -0.789019
In [95]:
stacked = df2.stack()
In [96]:
stacked
Out[96]:
first  second   
bar    one     A   -0.723323
               B    0.071984
       two     A   -1.068435
               B   -0.485561
baz    one     A    0.018726
               B   -0.379056
       two     A    2.051579
               B   -0.789019
dtype: float64
In [97]:
stacked.unstack()
Out[97]:
A B
first second
bar one -0.723323 0.071984
two -1.068435 -0.485561
baz one 0.018726 -0.379056
two 2.051579 -0.789019
In [98]:
stacked.unstack(1)
Out[98]:
second one two
first
bar A -0.723323 -1.068435
B 0.071984 -0.485561
baz A 0.018726 2.051579
B -0.379056 -0.789019
In [99]:
stacked.unstack(0)
Out[99]:
first bar baz
second
one A -0.723323 0.018726
B 0.071984 -0.379056
two A -1.068435 2.051579
B -0.485561 -0.789019
In [100]:
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
                   'B' : ['A', 'B', 'C'] * 4,
                   'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                   'D' : np.random.randn(12),
                   'E' : np.random.randn(12)})
In [101]:
df
Out[101]:
A B C D E
0 one A foo -0.435141 -0.012559
1 one B foo 1.419821 1.676063
2 two C foo -0.844238 -0.248897
3 three A bar 0.291280 1.768196
4 one B bar -0.559366 -1.223321
5 one C bar -1.111149 0.599756
6 two A foo -0.980251 0.168256
7 three B foo 0.596242 -0.515708
8 one C foo -0.445593 0.528068
9 one A bar 0.344460 -0.199001
10 two B bar -1.455970 -0.209612
11 three C bar 0.406563 0.340673
In [102]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])
Out[102]:
C bar foo
A B
one A 0.344460 -0.435141
B -0.559366 1.419821
C -1.111149 -0.445593
three A 0.291280 NaN
B NaN 0.596242
C 0.406563 NaN
two A NaN -0.980251
B -1.455970 NaN
C NaN -0.844238
In [103]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')
In [104]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
In [105]:
ts.resample('5Min').sum()
Out[105]:
2012-01-01    26177
Freq: 5T, dtype: int32
In [106]:
rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D')
In [107]:
ts = pd.Series(np.random.randn(len(rng)), rng)
In [108]:
ts
Out[108]:
2012-03-06    1.964786
2012-03-07   -0.086269
2012-03-08    0.017538
2012-03-09   -0.128088
2012-03-10   -0.657546
Freq: D, dtype: float64
In [109]:
ts_utc = ts.tz_localize('UTC')
In [110]:
ts_utc
Out[110]:
2012-03-06 00:00:00+00:00    1.964786
2012-03-07 00:00:00+00:00   -0.086269
2012-03-08 00:00:00+00:00    0.017538
2012-03-09 00:00:00+00:00   -0.128088
2012-03-10 00:00:00+00:00   -0.657546
Freq: D, dtype: float64
In [111]:
ts_utc.tz_convert('US/Eastern')
Out[111]:
2012-03-05 19:00:00-05:00    1.964786
2012-03-06 19:00:00-05:00   -0.086269
2012-03-07 19:00:00-05:00    0.017538
2012-03-08 19:00:00-05:00   -0.128088
2012-03-09 19:00:00-05:00   -0.657546
Freq: D, dtype: float64
In [112]:
rng = pd.date_range('1/1/2012', periods=5, freq='M')
In [113]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
In [114]:
ts
Out[114]:
2012-01-31    0.558833
2012-02-29   -0.213529
2012-03-31   -0.934557
2012-04-30   -1.941257
2012-05-31   -0.705337
Freq: M, dtype: float64
In [115]:
ps = ts.to_period()
In [116]:
ps
Out[116]:
2012-01    0.558833
2012-02   -0.213529
2012-03   -0.934557
2012-04   -1.941257
2012-05   -0.705337
Freq: M, dtype: float64
In [117]:
ps.to_timestamp()
Out[117]:
2012-01-01    0.558833
2012-02-01   -0.213529
2012-03-01   -0.934557
2012-04-01   -1.941257
2012-05-01   -0.705337
Freq: MS, dtype: float64
In [118]:
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
In [119]:
ts = pd.Series(np.random.randn(len(prng)), prng)
In [120]:
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
In [121]:
ts.head()
Out[121]:
1990-03-01 09:00    0.612225
1990-06-01 09:00    0.125501
1990-09-01 09:00   -0.647144
1990-12-01 09:00    0.858475
1991-03-01 09:00    0.408605
Freq: H, dtype: float64
In [122]:
df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
In [123]:
df["grade"] = df["raw_grade"].astype("category")
In [124]:
df["grade"]
Out[124]:
0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): [a, b, e]
In [125]:
df["grade"].cat.categories = ["very good", "good", "very bad"]
In [126]:
df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
In [127]:
df["grade"]
Out[127]:
0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (5, object): [very bad, bad, medium, good, very good]
In [128]:
df.sort_values(by="grade")
Out[128]:
id raw_grade grade
5 6 e very bad
1 2 b good
2 3 b good
0 1 a very good
3 4 a very good
4 5 a very good
In [129]:
df.groupby("grade").size()
Out[129]:
grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64
In [130]:
ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
In [131]:
ts = ts.cumsum()
In [132]:
ts.plot()
Out[132]:
<matplotlib.axes._subplots.AxesSubplot at 0x8a81b00>
In [133]:
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index,
                  columns=['A', 'B', 'C', 'D'])
In [134]:
df = df.cumsum()
In [135]:
plt.figure(); df.plot(); plt.legend(loc='best')
Out[135]:
<matplotlib.legend.Legend at 0x8cebf98>
<matplotlib.figure.Figure at 0x8d0d208>
In [136]:
df.to_csv('foo.csv')
In [137]:
pd.read_csv('foo.csv')
Out[137]:
Unnamed: 0 A B C D
0 2000-01-01 -0.030936 0.895485 0.860234 0.436905
1 2000-01-02 0.462136 1.529305 1.465795 0.275320
2 2000-01-03 1.347457 2.146237 1.025922 1.807516
3 2000-01-04 1.762626 1.617238 2.269617 2.750168
4 2000-01-05 0.752935 -0.573293 2.496633 2.555544
5 2000-01-06 0.559531 -1.075933 3.604460 2.045109
6 2000-01-07 2.864227 2.116665 4.646330 0.552505
7 2000-01-08 2.790815 1.112905 6.876355 0.842887
8 2000-01-09 2.781517 -0.038389 7.562739 -0.677093
9 2000-01-10 3.277815 0.783239 8.625772 -1.210071
10 2000-01-11 2.671833 1.628914 9.013839 -0.878836
11 2000-01-12 3.358392 1.286629 7.869067 0.927850
12 2000-01-13 2.934755 1.281981 8.026394 0.944987
13 2000-01-14 3.855528 1.044585 7.611520 -0.030036
14 2000-01-15 3.197158 1.342844 7.764532 0.003429
15 2000-01-16 3.397053 -0.159531 8.506764 0.788280
16 2000-01-17 2.604527 0.237801 9.856556 1.040767
17 2000-01-18 5.084828 0.499864 9.658846 2.005989
18 2000-01-19 4.106095 -1.350089 9.143522 1.686732
19 2000-01-20 4.175043 -1.190392 8.058163 0.787240
20 2000-01-21 5.185731 -0.777054 8.812265 0.419670
21 2000-01-22 4.371896 0.233903 8.661171 0.965343
22 2000-01-23 4.828027 -1.879995 9.411272 0.913708
23 2000-01-24 5.439256 -1.489195 9.325441 -0.105687
24 2000-01-25 4.692312 -2.057943 11.866773 0.281276
25 2000-01-26 3.076197 -1.366478 13.091266 1.299899
26 2000-01-27 3.061897 -1.074740 12.495540 2.413354
27 2000-01-28 3.331420 -1.146924 12.470048 4.507037
28 2000-01-29 2.539132 -2.027100 13.117951 4.890443
29 2000-01-30 3.063721 -0.814003 13.669964 4.517990
... ... ... ... ... ...
970 2002-08-28 0.284504 -22.271339 15.981331 15.047098
971 2002-08-29 1.179390 -21.201687 15.868212 14.459094
972 2002-08-30 -1.064233 -22.113170 15.675313 14.426811
973 2002-08-31 -0.204647 -21.770136 16.291319 16.018532
974 2002-09-01 1.565074 -22.972256 16.085625 17.164091
975 2002-09-02 1.594709 -22.544796 15.194863 16.747902
976 2002-09-03 2.058587 -23.616482 16.108197 15.722705
977 2002-09-04 2.204938 -24.498805 17.115751 16.182108
978 2002-09-05 2.544514 -25.655447 17.757754 13.716535
979 2002-09-06 0.042641 -26.231512 16.933801 14.481075
980 2002-09-07 0.151732 -23.564705 18.606103 15.288614
981 2002-09-08 -1.103293 -22.145037 19.017814 14.003808
982 2002-09-09 -0.886503 -23.226622 18.262366 14.134913
983 2002-09-10 -0.460123 -24.301707 18.189121 12.835480
984 2002-09-11 -0.828844 -25.875458 19.843507 12.759591
985 2002-09-12 -0.517894 -25.686486 21.261322 11.394933
986 2002-09-13 0.184588 -25.469125 21.141841 11.325127
987 2002-09-14 1.841196 -23.770084 21.204105 11.307507
988 2002-09-15 3.223569 -23.631714 20.283344 11.293488
989 2002-09-16 3.391174 -22.155609 19.469558 11.086509
990 2002-09-17 2.899235 -21.962757 20.295803 10.123460
991 2002-09-18 1.712088 -20.568953 21.282493 10.251396
992 2002-09-19 3.281950 -21.733617 20.914229 8.739577
993 2002-09-20 2.324456 -21.464629 20.747934 8.104013
994 2002-09-21 3.534397 -21.065582 18.900106 9.929498
995 2002-09-22 2.547043 -21.121919 19.291717 9.657817
996 2002-09-23 3.967868 -20.993894 19.146004 8.970358
997 2002-09-24 3.223028 -21.249086 19.858325 9.268188
998 2002-09-25 3.895291 -21.572320 21.571366 10.031764
999 2002-09-26 3.295146 -20.708933 22.339290 10.840932

1000 rows × 5 columns

In [138]:
df.to_hdf('foo.h5','df')
In [139]:
pd.read_hdf('foo.h5','df')
Out[139]:
A B C D
2000-01-01 -0.030936 0.895485 0.860234 0.436905
2000-01-02 0.462136 1.529305 1.465795 0.275320
2000-01-03 1.347457 2.146237 1.025922 1.807516
2000-01-04 1.762626 1.617238 2.269617 2.750168
2000-01-05 0.752935 -0.573293 2.496633 2.555544
2000-01-06 0.559531 -1.075933 3.604460 2.045109
2000-01-07 2.864227 2.116665 4.646330 0.552505
2000-01-08 2.790815 1.112905 6.876355 0.842887
2000-01-09 2.781517 -0.038389 7.562739 -0.677093
2000-01-10 3.277815 0.783239 8.625772 -1.210071
2000-01-11 2.671833 1.628914 9.013839 -0.878836
2000-01-12 3.358392 1.286629 7.869067 0.927850
2000-01-13 2.934755 1.281981 8.026394 0.944987
2000-01-14 3.855528 1.044585 7.611520 -0.030036
2000-01-15 3.197158 1.342844 7.764532 0.003429
2000-01-16 3.397053 -0.159531 8.506764 0.788280
2000-01-17 2.604527 0.237801 9.856556 1.040767
2000-01-18 5.084828 0.499864 9.658846 2.005989
2000-01-19 4.106095 -1.350089 9.143522 1.686732
2000-01-20 4.175043 -1.190392 8.058163 0.787240
2000-01-21 5.185731 -0.777054 8.812265 0.419670
2000-01-22 4.371896 0.233903 8.661171 0.965343
2000-01-23 4.828027 -1.879995 9.411272 0.913708
2000-01-24 5.439256 -1.489195 9.325441 -0.105687
2000-01-25 4.692312 -2.057943 11.866773 0.281276
2000-01-26 3.076197 -1.366478 13.091266 1.299899
2000-01-27 3.061897 -1.074740 12.495540 2.413354
2000-01-28 3.331420 -1.146924 12.470048 4.507037
2000-01-29 2.539132 -2.027100 13.117951 4.890443
2000-01-30 3.063721 -0.814003 13.669964 4.517990
... ... ... ... ...
2002-08-28 0.284504 -22.271339 15.981331 15.047098
2002-08-29 1.179390 -21.201687 15.868212 14.459094
2002-08-30 -1.064233 -22.113170 15.675313 14.426811
2002-08-31 -0.204647 -21.770136 16.291319 16.018532
2002-09-01 1.565074 -22.972256 16.085625 17.164091
2002-09-02 1.594709 -22.544796 15.194863 16.747902
2002-09-03 2.058587 -23.616482 16.108197 15.722705
2002-09-04 2.204938 -24.498805 17.115751 16.182108
2002-09-05 2.544514 -25.655447 17.757754 13.716535
2002-09-06 0.042641 -26.231512 16.933801 14.481075
2002-09-07 0.151732 -23.564705 18.606103 15.288614
2002-09-08 -1.103293 -22.145037 19.017814 14.003808
2002-09-09 -0.886503 -23.226622 18.262366 14.134913
2002-09-10 -0.460123 -24.301707 18.189121 12.835480
2002-09-11 -0.828844 -25.875458 19.843507 12.759591
2002-09-12 -0.517894 -25.686486 21.261322 11.394933
2002-09-13 0.184588 -25.469125 21.141841 11.325127
2002-09-14 1.841196 -23.770084 21.204105 11.307507
2002-09-15 3.223569 -23.631714 20.283344 11.293488
2002-09-16 3.391174 -22.155609 19.469558 11.086509
2002-09-17 2.899235 -21.962757 20.295803 10.123460
2002-09-18 1.712088 -20.568953 21.282493 10.251396
2002-09-19 3.281950 -21.733617 20.914229 8.739577
2002-09-20 2.324456 -21.464629 20.747934 8.104013
2002-09-21 3.534397 -21.065582 18.900106 9.929498
2002-09-22 2.547043 -21.121919 19.291717 9.657817
2002-09-23 3.967868 -20.993894 19.146004 8.970358
2002-09-24 3.223028 -21.249086 19.858325 9.268188
2002-09-25 3.895291 -21.572320 21.571366 10.031764
2002-09-26 3.295146 -20.708933 22.339290 10.840932

1000 rows × 4 columns

In [140]:
df.to_excel('foo.xlsx', sheet_name='Sheet1')
In [141]:
pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA'])
Out[141]:
A B C D
2000-01-01 -0.030936 0.895485 0.860234 0.436905
2000-01-02 0.462136 1.529305 1.465795 0.275320
2000-01-03 1.347457 2.146237 1.025922 1.807516
2000-01-04 1.762626 1.617238 2.269617 2.750168
2000-01-05 0.752935 -0.573293 2.496633 2.555544
2000-01-06 0.559531 -1.075933 3.604460 2.045109
2000-01-07 2.864227 2.116665 4.646330 0.552505
2000-01-08 2.790815 1.112905 6.876355 0.842887
2000-01-09 2.781517 -0.038389 7.562739 -0.677093
2000-01-10 3.277815 0.783239 8.625772 -1.210071
2000-01-11 2.671833 1.628914 9.013839 -0.878836
2000-01-12 3.358392 1.286629 7.869067 0.927850
2000-01-13 2.934755 1.281981 8.026394 0.944987
2000-01-14 3.855528 1.044585 7.611520 -0.030036
2000-01-15 3.197158 1.342844 7.764532 0.003429
2000-01-16 3.397053 -0.159531 8.506764 0.788280
2000-01-17 2.604527 0.237801 9.856556 1.040767
2000-01-18 5.084828 0.499864 9.658846 2.005989
2000-01-19 4.106095 -1.350089 9.143522 1.686732
2000-01-20 4.175043 -1.190392 8.058163 0.787240
2000-01-21 5.185731 -0.777054 8.812265 0.419670
2000-01-22 4.371896 0.233903 8.661171 0.965343
2000-01-23 4.828027 -1.879995 9.411272 0.913708
2000-01-24 5.439256 -1.489195 9.325441 -0.105687
2000-01-25 4.692312 -2.057943 11.866773 0.281276
2000-01-26 3.076197 -1.366478 13.091266 1.299899
2000-01-27 3.061897 -1.074740 12.495540 2.413354
2000-01-28 3.331420 -1.146924 12.470048 4.507037
2000-01-29 2.539132 -2.027100 13.117951 4.890443
2000-01-30 3.063721 -0.814003 13.669964 4.517990
... ... ... ... ...
2002-08-28 0.284504 -22.271339 15.981331 15.047098
2002-08-29 1.179390 -21.201687 15.868212 14.459094
2002-08-30 -1.064233 -22.113170 15.675313 14.426811
2002-08-31 -0.204647 -21.770136 16.291319 16.018532
2002-09-01 1.565074 -22.972256 16.085625 17.164091
2002-09-02 1.594709 -22.544796 15.194863 16.747902
2002-09-03 2.058587 -23.616482 16.108197 15.722705
2002-09-04 2.204938 -24.498805 17.115751 16.182108
2002-09-05 2.544514 -25.655447 17.757754 13.716535
2002-09-06 0.042641 -26.231512 16.933801 14.481075
2002-09-07 0.151732 -23.564705 18.606103 15.288614
2002-09-08 -1.103293 -22.145037 19.017814 14.003808
2002-09-09 -0.886503 -23.226622 18.262366 14.134913
2002-09-10 -0.460123 -24.301707 18.189121 12.835480
2002-09-11 -0.828844 -25.875458 19.843507 12.759591
2002-09-12 -0.517894 -25.686486 21.261322 11.394933
2002-09-13 0.184588 -25.469125 21.141841 11.325127
2002-09-14 1.841196 -23.770084 21.204105 11.307507
2002-09-15 3.223569 -23.631714 20.283344 11.293488
2002-09-16 3.391174 -22.155609 19.469558 11.086509
2002-09-17 2.899235 -21.962757 20.295803 10.123460
2002-09-18 1.712088 -20.568953 21.282493 10.251396
2002-09-19 3.281950 -21.733617 20.914229 8.739577
2002-09-20 2.324456 -21.464629 20.747934 8.104013
2002-09-21 3.534397 -21.065582 18.900106 9.929498
2002-09-22 2.547043 -21.121919 19.291717 9.657817
2002-09-23 3.967868 -20.993894 19.146004 8.970358
2002-09-24 3.223028 -21.249086 19.858325 9.268188
2002-09-25 3.895291 -21.572320 21.571366 10.031764
2002-09-26 3.295146 -20.708933 22.339290 10.840932

1000 rows × 4 columns