# Introduction to Computational Analysis

 Pay Notebook Creator: Roy Hyunjin Han 0 Set Container: Numerical CPU with TINY Memory for 10 Minutes 0 Total 0
In [1]:
"""

'Agency',
'Agency Name',
'Complaint Type',
'Borough',
'X Coordinate (State Plane)',
'Y Coordinate (State Plane)',
]].sort()
store = HDFStore('311-20111030-20111105.h5')
"""

In [2]:
from pandas import HDFStore

store = HDFStore('datasets/NYC-311-ServiceRequests.h5')
issues = store['issues']

In [3]:
issues.ix[0]

In [4]:
# How many 311 issues were reported that week?
len(issues)

In [5]:
# How many issues were reported on Halloween?
len(issues.ix['2011-10-31'])

In [6]:
# What were the top five categories reported that week?
issues['Complaint Type'].value_counts()[:5]

In [7]:
# What was the daily distribution of issues?
issues['Complaint Type'].resample('D', how=len).plot();

In [8]:
# How did the categorical distribution of issues differ between Brooklyn and the Bronx?
get_borough_counts = lambda borough: issues[issues.Borough == borough]['Complaint Type'].value_counts()
brooklyn = get_borough_counts('BROOKLYN')
bronx = get_borough_counts('BRONX')
difference = brooklyn.sub(bronx, fill_value=0).order()
print difference[:3]
print
print difference[-3:]

In [9]:
# How many issues were reported in each borough?
issues.groupby('Borough')['Complaint Type'].count()

In [10]:
# What was the spatial distribution of complaints?
points = issues[[
'X Coordinate (State Plane)',
'Y Coordinate (State Plane)',
]]
points.index = range(len(points))
points = points.dropna() / 500

In [11]:
minX, minY = points.min().values
maxX, maxY = points.max().values

In [12]:
import pylab as pl
from scipy.ndimage import gaussian_filter
image = np.zeros((maxY - minY + 1, maxX - minX + 1))
for x, y in points.values:
image[y - minY, x - minX] += 1
image = gaussian_filter(image, (1, 1))
pl.imshow(image, origin='lower');