web-scraping with BeautifulSoup


Pay Notebook Creator: Salah Ahmed0
Set Session Lifetime: 10 minutes0
Total0

Using python to scrape data from pubmed

{query: query ? keyword to search for on pubmed}

In [1]:
# Crosscompute
query = 'hypnosis'
target_folder = '.'
In [2]:
import requests
from os.path import join
from bs4 import BeautifulSoup
from pandas import DataFrame
In [3]:
path = join(target_folder, 'titles-%s.csv' % query)
url = 'https://www.ncbi.nlm.nih.gov/pubmed/'
In [4]:
# get html from a webpage, using key="term", value=query
r = requests.get(url, dict(term='query'))
# create BeautifulSoup object
soup = BeautifulSoup(r.content, 'lxml')
# get titles of articles from html, titles are the "text" in every class named "title"
tags = soup.findAll(class_='title')
titles = [t.text for t in tags]
# create dataframe object, set appropriate column name, export to csv file
df = DataFrame(titles, columns=['Title of Article'])
df.to_csv(path, index=False)
In [5]:
print('titles_table_path = %s' % path)
titles_table_path = ./titles-hypnosis.csv