Pandas Cookbook




Pay Notebook Creator: Elaine Chan0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [1]:
#crosscompute
In [2]:
import pandas as pd
import numpy as np
In [3]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[3]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [4]:
df.loc[df.AAA >= 5, 'BBB'] = 10000; df
Out[4]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 10000 50
2 6 10000 -30
3 7 10000 -50
In [5]:
df.loc[df.AAA >= 5, ['BBB', 'CCC']] = 55555; df
Out[5]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 55555 55555
2 6 55555 55555
3 7 55555 55555
In [6]:
df.loc[df.AAA < 5, ['BBB', 'CCC']] = -111111; df
Out[6]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 -111111 -111111
1 5 55555 55555
2 6 55555 55555
3 7 55555 55555
In [7]:
# boolean mask
mask = pd.DataFrame({'AAA' : [True] * 4, 'BBB' : [False] * 4,'CCC' : [True,False] * 2}); mask
Out[7]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 True False True
1 True False False
2 True False True
3 True False False
In [8]:
df.where(mask, -1000)
Out[8]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 -1000 -111111
1 5 -1000 -1000
2 6 -1000 55555
3 7 -1000 -1000
In [9]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[9]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [10]:
df['logic'] = np.where(df['AAA'] > 5, 'high', 'low'); df
Out[10]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC logic
0 4 10 100 low
1 5 20 50 low
2 6 30 -30 high
3 7 40 -50 high
In [11]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[11]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [12]:
low = df[df.AAA <=5]; low
Out[12]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
In [13]:
high = df[df.AAA >5]; high
Out[13]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
2 6 30 -30
3 7 40 -50
In [14]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[14]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [15]:
newseries = df.loc[(df['BBB'] < 25) & (df['CCC'] >= -40), 'AAA']; newseries
Out[15]:
0    4
1    5
Name: AAA, dtype: int64
In [16]:
type(newseries)
Out[16]:
pandas.core.series.Series
In [17]:
newseries = df.loc[(df['BBB'] > 25) | (df['CCC'] >= -40), 'AAA']; newseries
Out[17]:
0    4
1    5
2    6
3    7
Name: AAA, dtype: int64
In [18]:
df.loc[(df['BBB'] > 25) | (df['CCC'] >= -40), 'AAA'] = 0.01; df
Out[18]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 0.01 10 100
1 0.01 20 50
2 0.01 30 -30
3 0.01 40 -50
In [19]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[19]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [20]:
val = 4.0
In [21]:
df.loc[(df.CCC - val).abs().argsort()]
Out[21]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
2 6 30 -30
1 5 20 50
3 7 40 -50
0 4 10 100
In [22]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[22]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [23]:
df.loc[(df['BBB'] > 25) | (df.CCC >= -40), 'AAA'] = 0.01; df
Out[23]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 0.01 10 100
1 0.01 20 50
2 0.01 30 -30
3 0.01 40 -50
In [24]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[24]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [25]:
df.loc[(df.BBB > 25) | (df.CCC >= -40), 'AAA'] = 0.01; df
Out[25]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 0.01 10 100
1 0.01 20 50
2 0.01 30 -30
3 0.01 40 -50
In [26]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[26]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [27]:
c1 = df.AAA <= 5.5
c2 = df.BBB == 10.0
c3 = df.CCC > -40.0
In [28]:
allc = c1 & c2 & c3
In [29]:
df[allc]
Out[29]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
In [30]:
import functools
clist = [c1,c2,c3]
allcrit = functools.reduce(lambda x,y: x & y, clist)
df[allcrit]
Out[30]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
In [31]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
Out[31]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [32]:
data = {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}
In [33]:
df = pd.DataFrame(data=data, index=['foo','bar','dog','park']); df
Out[33]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
foo 4 10 100
bar 5 20 50
dog 6 30 -30
park 7 40 -50
In [34]:
df.loc['bar':'park'] # label oriented slicing
Out[34]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
bar 5 20 50
dog 6 30 -30
park 7 40 -50
In [35]:
df.iloc[0:3] # position oriented
Out[35]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
foo 4 10 100
bar 5 20 50
dog 6 30 -30
In [36]:
df2 = pd.DataFrame(data=data,index=[1,2,3,4]); df2
Out[36]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
1 4 10 100
2 5 20 50
3 6 30 -30
4 7 40 -50
In [37]:
df2.iloc[1:3] # position oriented
Out[37]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
2 5 20 50
3 6 30 -30
In [38]:
df2.loc[1:3]
Out[38]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
1 4 10 100
2 5 20 50
3 6 30 -30
In [39]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40], 'CCC' : [100,50,-30,-50]}); df
Out[39]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
In [40]:
df.index
Out[40]:
RangeIndex(start=0, stop=4, step=1)
In [41]:
df.index.isin([0,1,3])
Out[41]:
array([ True,  True, False,  True])
In [42]:
df[~((df.AAA <=6) & (df.index.isin([0,2,4])))]
Out[42]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
1 5 20 50
3 7 40 -50
In [43]:
df[((df.AAA <=6) & (df.index.isin([0,2,4])))]
Out[43]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
2 6 30 -30
In [44]:
df[(df.AAA <=6)]
Out[44]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
In [45]:
df[~(df.AAA <=6)]
Out[45]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
3 7 40 -50
In [46]:
rng = pd.date_range('1/1/2013', periods=100, freq='D')
In [47]:
rng
Out[47]:
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10', '2013-01-11', '2013-01-12',
               '2013-01-13', '2013-01-14', '2013-01-15', '2013-01-16',
               '2013-01-17', '2013-01-18', '2013-01-19', '2013-01-20',
               '2013-01-21', '2013-01-22', '2013-01-23', '2013-01-24',
               '2013-01-25', '2013-01-26', '2013-01-27', '2013-01-28',
               '2013-01-29', '2013-01-30', '2013-01-31', '2013-02-01',
               '2013-02-02', '2013-02-03', '2013-02-04', '2013-02-05',
               '2013-02-06', '2013-02-07', '2013-02-08', '2013-02-09',
               '2013-02-10', '2013-02-11', '2013-02-12', '2013-02-13',
               '2013-02-14', '2013-02-15', '2013-02-16', '2013-02-17',
               '2013-02-18', '2013-02-19', '2013-02-20', '2013-02-21',
               '2013-02-22', '2013-02-23', '2013-02-24', '2013-02-25',
               '2013-02-26', '2013-02-27', '2013-02-28', '2013-03-01',
               '2013-03-02', '2013-03-03', '2013-03-04', '2013-03-05',
               '2013-03-06', '2013-03-07', '2013-03-08', '2013-03-09',
               '2013-03-10', '2013-03-11', '2013-03-12', '2013-03-13',
               '2013-03-14', '2013-03-15', '2013-03-16', '2013-03-17',
               '2013-03-18', '2013-03-19', '2013-03-20', '2013-03-21',
               '2013-03-22', '2013-03-23', '2013-03-24', '2013-03-25',
               '2013-03-26', '2013-03-27', '2013-03-28', '2013-03-29',
               '2013-03-30', '2013-03-31', '2013-04-01', '2013-04-02',
               '2013-04-03', '2013-04-04', '2013-04-05', '2013-04-06',
               '2013-04-07', '2013-04-08', '2013-04-09', '2013-04-10'],
              dtype='datetime64[ns]', freq='D')
In [48]:
w_rng = pd.date_range('1/1/2013', periods=100, freq='W'); w_rng
Out[48]:
DatetimeIndex(['2013-01-06', '2013-01-13', '2013-01-20', '2013-01-27',
               '2013-02-03', '2013-02-10', '2013-02-17', '2013-02-24',
               '2013-03-03', '2013-03-10', '2013-03-17', '2013-03-24',
               '2013-03-31', '2013-04-07', '2013-04-14', '2013-04-21',
               '2013-04-28', '2013-05-05', '2013-05-12', '2013-05-19',
               '2013-05-26', '2013-06-02', '2013-06-09', '2013-06-16',
               '2013-06-23', '2013-06-30', '2013-07-07', '2013-07-14',
               '2013-07-21', '2013-07-28', '2013-08-04', '2013-08-11',
               '2013-08-18', '2013-08-25', '2013-09-01', '2013-09-08',
               '2013-09-15', '2013-09-22', '2013-09-29', '2013-10-06',
               '2013-10-13', '2013-10-20', '2013-10-27', '2013-11-03',
               '2013-11-10', '2013-11-17', '2013-11-24', '2013-12-01',
               '2013-12-08', '2013-12-15', '2013-12-22', '2013-12-29',
               '2014-01-05', '2014-01-12', '2014-01-19', '2014-01-26',
               '2014-02-02', '2014-02-09', '2014-02-16', '2014-02-23',
               '2014-03-02', '2014-03-09', '2014-03-16', '2014-03-23',
               '2014-03-30', '2014-04-06', '2014-04-13', '2014-04-20',
               '2014-04-27', '2014-05-04', '2014-05-11', '2014-05-18',
               '2014-05-25', '2014-06-01', '2014-06-08', '2014-06-15',
               '2014-06-22', '2014-06-29', '2014-07-06', '2014-07-13',
               '2014-07-20', '2014-07-27', '2014-08-03', '2014-08-10',
               '2014-08-17', '2014-08-24', '2014-08-31', '2014-09-07',
               '2014-09-14', '2014-09-21', '2014-09-28', '2014-10-05',
               '2014-10-12', '2014-10-19', '2014-10-26', '2014-11-02',
               '2014-11-09', '2014-11-16', '2014-11-23', '2014-11-30'],
              dtype='datetime64[ns]', freq='W-SUN')
In [49]:
m_rng = pd.date_range('1/1/2013', periods=100, freq='M'); m_rng
Out[49]:
DatetimeIndex(['2013-01-31', '2013-02-28', '2013-03-31', '2013-04-30',
               '2013-05-31', '2013-06-30', '2013-07-31', '2013-08-31',
               '2013-09-30', '2013-10-31', '2013-11-30', '2013-12-31',
               '2014-01-31', '2014-02-28', '2014-03-31', '2014-04-30',
               '2014-05-31', '2014-06-30', '2014-07-31', '2014-08-31',
               '2014-09-30', '2014-10-31', '2014-11-30', '2014-12-31',
               '2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30',
               '2015-05-31', '2015-06-30', '2015-07-31', '2015-08-31',
               '2015-09-30', '2015-10-31', '2015-11-30', '2015-12-31',
               '2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31',
               '2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-30', '2017-10-31', '2017-11-30', '2017-12-31',
               '2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
               '2018-05-31', '2018-06-30', '2018-07-31', '2018-08-31',
               '2018-09-30', '2018-10-31', '2018-11-30', '2018-12-31',
               '2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31',
               '2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30'],
              dtype='datetime64[ns]', freq='M')
In [50]:
data = np.random.randn(100,4); data
Out[50]:
array([[ 7.91595382e-01, -1.34911137e+00, -3.45966129e-01,
        -9.89117739e-01],
       [-7.55117120e-01,  3.86107083e-01, -1.99550994e+00,
         2.95347100e-01],
       [ 7.47017252e-01, -6.02029109e-01,  1.71111884e-01,
        -1.57509969e+00],
       [-5.16566355e-01, -2.77683345e-02, -1.00278194e-01,
        -6.19958803e-01],
       [-1.50033222e+00,  1.42237695e+00, -6.85413044e-01,
        -5.86878525e-01],
       [ 1.58846176e+00, -1.47597928e-02,  1.02220862e+00,
        -5.89081289e-01],
       [ 7.20386038e-02, -1.31888587e+00,  1.01129242e-01,
         2.04637741e+00],
       [-1.21601238e-01, -6.13962551e-03,  1.52411555e+00,
        -3.34470424e-01],
       [-7.59254864e-01,  6.62075019e-01,  1.30173581e+00,
        -3.71449352e-01],
       [-7.58248484e-01,  8.81605517e-01, -1.02718873e+00,
        -1.58124232e+00],
       [ 7.24201462e-01,  8.44448716e-01, -1.75234248e-01,
        -1.12020800e-01],
       [-6.73994935e-02,  1.32993501e+00, -1.78561049e+00,
         5.30985491e-01],
       [ 1.05919659e+00, -2.11618315e+00, -1.16732285e+00,
         4.49637288e-01],
       [-6.71372661e-01, -1.85050458e+00,  9.76720667e-01,
        -2.82148038e-01],
       [-9.50187297e-02, -1.69343436e-01,  8.69733484e-01,
         7.75368874e-01],
       [ 3.20651509e-01,  4.05639581e-01, -4.76968317e-01,
        -1.24472540e+00],
       [ 1.07107087e-01,  1.65212424e+00,  6.77359748e-01,
         4.25528397e-02],
       [-1.20214636e+00, -1.74444691e+00, -5.56717674e-01,
        -1.13702406e-01],
       [-4.94061122e-01, -1.37497207e+00, -1.14197049e-01,
        -2.09172957e-01],
       [ 4.45773889e-01, -7.04514041e-01, -1.53674259e-01,
         1.09211545e+00],
       [-4.48082917e-01,  1.31542886e+00, -1.47939839e+00,
        -9.58917301e-01],
       [-2.09355813e-01, -8.71900747e-01, -9.09076772e-01,
        -2.87919241e-01],
       [ 3.87624872e-01,  1.28825771e+00,  1.35562533e-01,
         7.42507397e-01],
       [-5.46862477e-03,  1.88327004e+00, -6.52387837e-01,
         8.56516877e-02],
       [ 2.50246116e+00,  1.73903293e+00, -5.22370928e-01,
         2.16353876e-01],
       [ 7.99892894e-01,  3.44444913e-01, -8.78853218e-01,
         1.88519254e-01],
       [ 2.97604661e+00,  2.48359865e-01,  1.02920551e+00,
        -8.72139578e-01],
       [-1.99656120e+00,  4.95014887e-01, -2.84103495e-02,
         1.38338610e+00],
       [ 1.20279176e+00, -7.02224132e-01, -1.12250223e+00,
         2.24885448e+00],
       [ 7.15776345e-01,  2.15280819e+00, -7.73995105e-01,
        -8.37387899e-01],
       [-1.20357406e+00, -4.82440434e-01, -2.09022439e+00,
        -1.14932185e-02],
       [-1.68734405e+00, -1.32908226e-01,  1.26626750e+00,
        -5.83261499e-02],
       [ 1.45737956e-02, -6.67207025e-01, -1.53501133e+00,
        -1.78441575e+00],
       [-4.46388317e-02,  6.03175841e-02,  1.17285894e+00,
        -1.00608911e+00],
       [-2.56987699e-01, -2.40789762e+00,  1.14793077e+00,
         1.33028963e+00],
       [-1.58843413e-03,  1.77483204e+00, -6.02376252e-02,
        -8.00741659e-01],
       [-8.08875870e-01, -2.12324023e-01,  1.94132243e+00,
         1.03066892e-01],
       [-6.05211350e-01, -1.46427753e+00,  7.02111302e-01,
        -2.22923652e-01],
       [ 1.57404517e+00,  6.78712208e-01, -1.51002962e+00,
         1.20156962e+00],
       [-1.92493529e+00, -4.13350467e-01, -1.42406662e+00,
        -1.77062680e+00],
       [ 6.59444850e-01,  1.83265300e-01,  2.72053811e-01,
         1.69022281e+00],
       [ 8.96130336e-01,  2.56554343e-01,  3.19927835e+00,
         6.23923840e-01],
       [ 1.14967886e+00,  6.34940637e-01, -5.73441544e-01,
        -3.85047320e-01],
       [-2.45543797e-01,  2.40222433e-01, -1.11994948e+00,
        -5.40750890e-01],
       [-1.58389458e-01,  1.19627995e+00, -2.43727983e-01,
         1.68715281e+00],
       [ 1.12965896e+00,  1.43149022e-01,  4.26721468e-02,
         4.32937280e-01],
       [ 7.64311411e-01, -9.89081735e-01,  1.52329025e+00,
        -1.52618216e+00],
       [ 1.09500282e+00,  2.58434543e-01,  4.13483205e-01,
         1.60146359e+00],
       [-2.03359149e-01,  1.93947739e-01,  2.43383659e-01,
         6.60890643e-01],
       [ 4.51220300e-01,  5.88242402e-01,  3.71099759e-02,
        -5.33700896e-01],
       [ 1.44533749e+00, -4.25299266e-01,  1.24096563e+00,
        -3.29554057e-02],
       [ 4.78726567e-01, -1.17938537e+00,  9.89449929e-01,
        -1.54162692e+00],
       [-2.35478509e-01, -9.46381884e-01, -1.06022281e+00,
        -8.82478781e-01],
       [ 1.40886588e-01,  7.29948865e-01,  9.12681072e-01,
         1.18380907e+00],
       [ 9.72480857e-01,  5.73022903e-01,  7.11926582e-02,
        -7.65133578e-02],
       [-1.73610544e-01,  2.31971154e-01,  5.95947243e-01,
        -7.56357879e-01],
       [ 5.18393093e-02,  3.66071336e-01,  5.62739755e-01,
         1.01697285e+00],
       [ 5.90564684e-01, -6.20378204e-01,  3.28975706e-01,
         7.82950814e-01],
       [ 5.55887503e-02, -2.18269965e+00,  8.87416969e-01,
        -3.13839716e-01],
       [-8.27526946e-01, -5.33326150e-02, -1.60588791e-01,
         6.19815430e-01],
       [-1.36181917e+00, -8.55715950e-01, -1.74295391e-01,
         1.40361113e+00],
       [ 1.02145095e+00, -1.58424928e+00,  1.34782631e-02,
        -1.00891734e+00],
       [-1.38974477e+00,  5.34867186e-01,  6.98031258e-01,
        -2.91467136e-01],
       [ 6.13226976e-01,  3.16821344e-01,  4.99817544e-01,
        -1.08069178e+00],
       [-1.52951818e+00,  7.63104752e-02, -1.59721097e+00,
         5.69867989e-01],
       [ 1.68942891e+00,  1.00672237e+00, -1.42563341e+00,
        -1.01469954e+00],
       [-2.35741754e-01, -1.34457903e+00, -5.06103372e-01,
         4.96817536e-01],
       [ 3.56574586e-01,  6.14695774e-01,  3.00223798e-01,
        -1.04737344e+00],
       [-2.15237991e+00, -1.03303891e+00,  1.28160757e+00,
        -8.93404656e-01],
       [-1.47468512e+00, -5.11026007e-01,  4.83095282e-01,
         1.55491496e+00],
       [ 3.00615406e-01, -2.90733128e-01, -5.82878271e-01,
         4.27807444e-01],
       [-3.09661052e-01,  3.16668646e-01,  1.74841205e+00,
         3.67156085e-03],
       [-6.06176099e-01,  9.41557111e-01,  1.88327865e+00,
        -4.86473523e-01],
       [-3.30225379e-01, -9.80805682e-01, -3.68086129e-01,
         6.76443812e-01],
       [ 1.39772548e+00, -7.71529966e-01,  4.18392416e-01,
         6.20576940e-01],
       [-8.57746360e-01, -1.49381516e+00, -5.33580448e-01,
         1.32238019e+00],
       [ 2.23233201e-01, -1.18323417e+00, -2.65126473e-02,
         1.91888751e-01],
       [ 9.10946105e-01, -6.81100503e-01,  5.96367116e-01,
        -4.77754951e-02],
       [-7.35338470e-01,  1.10963664e+00,  7.59682193e-01,
        -2.22089538e-01],
       [ 1.55306855e-01,  1.99577232e+00, -1.65798041e+00,
        -1.19883339e+00],
       [ 1.07078156e+00, -1.29195231e+00,  5.23912478e-01,
        -3.64947271e-01],
       [ 3.99608290e-01, -4.14879107e-02,  1.19768431e+00,
        -7.43979823e-02],
       [ 8.96371315e-01,  1.42384585e+00,  4.33512540e-01,
         2.07583874e+00],
       [-3.65957866e-01,  4.33232194e-02,  1.50650777e+00,
        -1.17280647e+00],
       [-3.57630957e-01, -8.52806404e-01, -4.12317041e-01,
         5.76744489e-02],
       [ 1.95107953e-02,  5.73246239e-01, -9.51795083e-02,
         1.97768125e-01],
       [-8.42090912e-01,  1.37821011e+00,  2.91744766e+00,
         5.34413580e-01],
       [ 8.69583198e-01, -4.14703776e-01,  1.62508644e+00,
         3.31168639e-01],
       [ 9.20586320e-01, -2.23022641e+00,  3.93762703e-01,
         7.10622953e-01],
       [-3.08708037e-01,  6.98581179e-01,  2.06278765e+00,
        -1.74743914e+00],
       [-1.43051810e+00,  9.56749791e-01, -2.82464186e-01,
        -1.08282494e-01],
       [ 1.15740305e+00, -9.75398360e-01, -2.75383592e+00,
         2.12617018e-01],
       [ 4.17955248e-01, -6.33093982e-01,  1.34862313e+00,
        -2.84305213e-01],
       [-6.36212846e-01, -1.09274769e+00,  6.09261897e-01,
         1.04442120e+00],
       [-3.35068934e-01, -7.49597078e-01, -5.68565586e-01,
         4.40659898e-01],
       [ 1.06415162e+00, -1.89094989e+00, -4.26899325e-01,
        -1.77707845e+00],
       [ 1.18860053e+00,  7.91931176e-01, -2.89960760e-01,
         1.77217378e+00],
       [-1.70132765e+00, -3.48600143e-01, -3.13812042e-01,
        -4.70586122e-01],
       [-7.34596181e-01, -4.44302701e-01,  4.53562887e-03,
         3.39609651e-01],
       [-1.07244030e+00,  1.53907373e+00, -2.94261107e-01,
        -6.41175610e-02]])
In [51]:
cols = ['A','B','C','D']
In [52]:
df1, df2, df3 = pd.DataFrame(data, rng, cols), pd.DataFrame(data, rng, cols), pd.DataFrame(data, rng, cols)
In [53]:
pf = pd.Panel({'df1':df1, 'df2':df2, 'df3':df3}); pf
/home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2963: FutureWarning: 
Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  exec(code_obj, self.user_global_ns, self.user_ns)
Out[53]:
<class 'pandas.core.panel.Panel'>
Dimensions: 3 (items) x 100 (major_axis) x 4 (minor_axis)
Items axis: df1 to df3
Major_axis axis: 2013-01-01 00:00:00 to 2013-04-10 00:00:00
Minor_axis axis: A to D
In [54]:
df = pd.DataFrame({'AAA' : [1,2,1,3], 'BBB' : [1,1,2,2], 'CCC' : [2,1,3,1]}); df
Out[54]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC
0 1 1 2
1 2 1 1
2 1 2 3
3 3 2 1
In [55]:
src_cols = df.columns; src_cols
Out[55]:
Index(['AAA', 'BBB', 'CCC'], dtype='object')
In [56]:
new_cols = [str(x) + "_cat" for x in src_cols];new_cols
Out[56]:
['AAA_cat', 'BBB_cat', 'CCC_cat']
In [57]:
categories = {1:'Alpha', 2:'Bravo', 3:'Charlie'}
In [58]:
df[new_cols] = df[src_cols].applymap(categories.get);df
Out[58]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB CCC AAA_cat BBB_cat CCC_cat
0 1 1 2 Alpha Alpha Bravo
1 2 1 1 Bravo Alpha Alpha
2 1 2 3 Alpha Bravo Charlie
3 3 2 1 Charlie Bravo Alpha
In [59]:
df = pd.DataFrame({'AAA' : [100,100,100,200,200,200,300,300,400,500,600], 'BBB' : [200,100,300,400,500,100,200,300,500,200,100]});df
Out[59]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB
0 100 200
1 100 100
2 100 300
3 200 400
4 200 500
5 200 100
6 300 200
7 300 300
8 400 500
9 500 200
10 600 100
In [60]:
df.loc[df.groupby('AAA')['BBB'].idxmin()]
Out[60]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB
1 100 100
5 200 100
6 300 200
8 400 500
9 500 200
10 600 100
In [61]:
df.sort_values(by='BBB').groupby('AAA',as_index=False).first()
Out[61]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
AAA BBB
0 100 100
1 200 100
2 300 200
3 400 500
4 500 200
5 600 100
In [67]:
df = pd.DataFrame({'row':[0,1,2],
                 'One_X':[1.1,1.1,1.1],
                 'One_Y' : [1.2,1.2,1.2],
                 'Two_X' : [1.11,1.11,1.11],
                 'Two_Y' : [1.22,1.22,1.22]});df
Out[67]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
row One_X One_Y Two_X Two_Y
0 0 1.1 1.2 1.11 1.22
1 1 1.1 1.2 1.11 1.22
2 2 1.1 1.2 1.11 1.22
In [68]:
# As Labelled Index
df_li = df.set_index('row');df_li
Out[68]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
One_X One_Y Two_X Two_Y
row
0 1.1 1.2 1.11 1.22
1 1.1 1.2 1.11 1.22
2 1.1 1.2 1.11 1.22
In [69]:
# With Hierarchical Columns
df.columns = pd.MultiIndex.from_tuples([tuple(c.split('_')) for c in df.columns]);df
Out[69]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead tr th { text-align: left; } </style>
row One Two
NaN X Y X Y
0 0 1.1 1.2 1.11 1.22
1 1 1.1 1.2 1.11 1.22
2 2 1.1 1.2 1.11 1.22
In [70]:
# Stack
df_s = df.stack(0); df_s
Out[70]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
X Y
0 One 1.10 1.20
Two 1.11 1.22
1 One 1.10 1.20
Two 1.11 1.22
2 One 1.10 1.20
Two 1.11 1.22
In [72]:
# Stack and reset index
df_r = df.stack(0).reset_index(1); df_r
Out[72]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
level_1 X Y
0 One 1.10 1.20
0 Two 1.11 1.22
1 One 1.10 1.20
1 Two 1.11 1.22
2 One 1.10 1.20
2 Two 1.11 1.22
In [74]:
df_r.columns = ['Sample','All_X','All_Y'];df_r
Out[74]:
<style scoped=""> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Sample All_X All_Y
0 One 1.10 1.20
0 Two 1.11 1.22
1 One 1.10 1.20
1 Two 1.11 1.22
2 One 1.10 1.20
2 Two 1.11 1.22