# Introduction to Computational Analysis

 Pay Notebook Creator: Roy Hyunjin Han 0 Set Container: Numerical CPU with TINY Memory for 10 Minutes 0 Total 0

# Pandas aligns labelled data¶

In [1]:
from pandas import Series, DataFrame, Panel

## Series is a list with labelled items¶

In [2]:
februaryOrganicSpinachPrices = Series({2004: 8.02, 2005: 7.63, 2006: 8.03})
novemberOrganicSpinachPrices = Series({2004: 5.70, 2005: 5.23, 2006: 11.63})
novemberOrganicSpinachPrices
In [3]:
print novemberOrganicSpinachPrices[2004]

## DataFrame is a table with labelled columns and rows¶

In [4]:
conventionalSpinachPrices = DataFrame(dict(
february=Series({2004: 3.42, 2005: 4.03, 2006: 4.13}),
november=Series({2004: 3.58, 2005: 4.19, 2006: 4.16})))

organicSpinachPrices = DataFrame(dict(
february=februaryOrganicSpinachPrices,
november=novemberOrganicSpinachPrices))

organicSpinachPrices
In [5]:
organicSpinachPrices['november']
In [6]:
print organicSpinachPrices['november'][2004]
In [7]:
print organicSpinachPrices[:2]

## Panel is a collection of labelled tables¶

In [8]:
spinachPrices = Panel(dict(
conventional=conventionalSpinachPrices,
organic=organicSpinachPrices))
spinachPrices
In [9]:
spinachPrices['organic']
In [10]:
spinachPrices['organic']['november']
In [11]:
print spinachPrices['organic']['november'][2004]

Beware of using len() to find the number of rows in a DataFrame inside a Panel. The Panel will pad non-matching rows and columns across DataFrames with NaN and len() will include padded rows. Instead, use DataFrame.count().

## Reindexing¶

In [12]:
df = DataFrame(np.array(xrange(1,10)).reshape(3,3), index='x y z'.split(), columns='a b c'.split())
print df.cumsum()
In [13]:
df1 = df.reindex(index=list('zyx'), columns=list('cab'))
df1
In [14]:
df2 = DataFrame(np.array(range(1, 10)).reshape(3,3), index=list('zyx'), columns=list('cba'))
df2
In [15]:
df2.reindex_like(df1)

## Aggregation¶

In [16]:
df = DataFrame(dict(