# Introduction to Computational Analysis

 Pay Notebook Creator: Roy Hyunjin Han 0 Set Container: Numerical CPU with TINY Memory for 10 Minutes 0 Total 0
In [31]:
import pylab as pl
from scripts import make_users
users1 = make_users(teenCount=250, twentyCount=500, thirtyCount=150, fortyCount=100)
users2 = make_users(teenCount=500, twentyCount=400, thirtyCount=90, fortyCount=10)


# Market segmentation¶

Trevor runs a news website. He wants to identify the demographics of readers for each article in order to show targeted ads. For each user, he has the following data:

• Age
• Gender
• Device (desktop, laptop, cell phone, smartphone, tablet)
• Location

## Explore dataset¶

In [32]:
# Look at the characteristics of the first user
zip(users1.feature_names, users1.data[0])


## Visualize dataset¶

In [33]:
# Scatterplot age and device
pl.scatter(users1.data[:, 0], users1.data[:, 2]);

In [34]:
# Scatterplot locations
pl.scatter(users1.data[:, 3], users1.data[:, 4]);
pl.xticks([]);
pl.yticks([]);

In [35]:
import pylab as pl

def plot_embedding(X):
x_min, x_max = np.min(X, 0), np.max(X, 0)
X = (X - x_min) / (x_max - x_min)
pl.scatter(X[:, 0], X[:, 1])
pl.xticks([]), pl.yticks([])

from sklearn import manifold
visualization = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2, method='standard')

In [36]:
# Visualize data
plot_embedding(visualization.fit_transform(users1.data))

In [37]:
# Visualize scaled data
from sklearn.preprocessing import scale
plot_embedding(visualization.fit_transform(scale(users1.data)))


## Cluster users¶

In [38]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(users1.data)
users1DataScaled = scaler.transform(users1.data)

cluster = KMeans(init='k-means++', n_clusters=4, n_init=10)
cluster.fit(users1DataScaled)

In [39]:
# Look at the characteristics of the first user
zip(users1.feature_names, users1.data[0])

In [40]:
# Get the category of the first user
cluster.predict(scaler.transform(users1.data[0]))

In [41]:
# Get the category of a similar user
cluster.predict(scaler.transform(np.array([10., 0., 4., 5., -10.])))