# Leaves Outlook

 Pay Notebook Creator: Naiem Gafar 0 Set Container: Numerical CPU with TINY Memory for 10 Minutes 0 Total 0
In [5]:
# This set of code was used to create a final dataframe with three columns:
# Species
# Type (coniferous or decidious or None)
# Link for that species

# Example of final output:
# 7,Acer pseudoplatanus,deciduous,https://en.wikipedia.org/wiki?curid=437919

# The final results were then saved as a new csv file

# As a team, we have decided that it would be faster/ more efficent to just do this process once

# We would then implement this newly created csv file into our main program
# so that we dont have to call the wikipedia API everytime
# this saves a lot of time!

In [6]:
import pandas as pd
import wikipedia # Used to access/ work with wikipedia API
import datetime

# Function # 1 that takes in a species name as a parameter and returns its type (coniferous or deciduous)
def LookInSummary(treeSpeciesInLatin):

# Takes a 10 millisecond pause after every call
wikipedia.set_rate_limiting(rate_limit = True, min_wait=datetime.timedelta(0, 0, 10))

# First get the summary from that species' wikipedia page
try:
summary = wikipedia.summary(treeSpeciesInLatin)

# If it finds nothing OR multiple results, just return None (do nothing)
except wikipedia.exceptions.DisambiguationError as e:
return (None)
except wikipedia.exceptions.PageError as e:
return (None)
if summary is None:
return

# If it finds deciduous or confierous in the summary, then that tree is decidouous or coniferous
if "deciduous" in summary:
return ('deciduous')
elif "coniferous" in summary:
return ('coniferous')
elif "evergreen" in summary:
return ('evergreen')

# If it can't find it in the page's summary, then it might be in the "Description" section of the page
else:
page = wikipedia.page(treeSpeciesInLatin)
if page is None:
return
description = page.section("Description")
if description is None:
return
if "deciduous" in description:
return ('deciduous')
elif "coniferous" in description:
return ('coniferous')
elif "evergreen" in summary:
return ('evergreen')
return ('None')

# Function # 2 that takes in a species name as a parameter and returns the URL of its wikipedia page
try:
webpage = wikipedia.page(treeSpeciesInLatin)
except wikipedia.exceptions.DisambiguationError as e:
return (None)
except wikipedia.exceptions.PageError as e:
return (None)
URL = webpage.pageid
return ('https://en.wikipedia.org/wiki?curid='+URL)

In [7]:
# Use the original CSV file, open it as a dataframe
df = pd.read_csv('Copy of 2015StreetTreesCensus_TREES.csv')

# Only get the 'spc_latin' column for each tree
df = df['spc_latin']

# Get a list of all of the unique species (about 132 different species)
listOfAllTreeSpecies = df.unique()

# Turn that list into a dataframe with the column heading 'Species'
df = pd.DataFrame({'Species':listOfAllTreeSpecies})

# Add a new column to the dataframe named 'Type'
# Each value in this column is the type of that species
# To get the type, we used Function # 1 from above
df['Type'] = list(map(LookInSummary, listOfAllTreeSpecies))

# If it is blank, fill it in with 'Obama', this is a safe word, as opposed to 'None'
df = df.fillna('Obama')

# Add a new column to the dataframe named 'URL Links'
# Each value in this column is the Wikipedia Link for that species
# To get the link, we used Function # 2 from above