from IPython.display import IFrame

IFrame("https://media.ccc.de/v/emf2016-269-beer-for-everyone/oembed", width=1024, height=576)

import requests
from bs4 import BeautifulSoup
import pandas as pd

dfs = []
for index in range(14):
    index_url = "https://brewdogrecipes.com/p{}".format(index)
    r = requests.get(index_url)
    soup = BeautifulSoup(r.text, 'html.parser')
    recipes = soup.find('div', class_='recipe-list').find_all('article')
    recipe_urls = [recipe.find('a').attrs['href'] for recipe in recipes]
    recipe_names = [recipe.find('h2').text for recipe in recipes]
    dfs.append(pd.DataFrame({'names': recipe_names, 'urls': recipe_urls }))

df = pd.concat(dfs)

recipe_df = df.drop_duplicates()

recipe_df.describe()

import tqdm

series = []
for recipe_url, name in zip(tqdm.tqdm(recipe_df.urls), recipe_df.names):
    r = requests.get(recipe_url)
    soup = BeautifulSoup(r.text, 'html.parser')
    tag = [item for item in soup.find_all('div', class_='panel panel-brewdog') if item.find(lambda tag: tag.find('h3') and tag.find('h3').text == 'METHOD/TIMINGS') is not None][0]
    items = [item.text for item in tag.find_all('td')]
    try:
        values = {'beer_name': name, 'mash_temp': items[0], 'mash_time': items[2], 'fermentation_temp': items[3]}
    except IndexError:
        print('could not retrieve data for url: {}'.format(recipe_url))
    series.append(pd.Series(values))
    
df_parameters = pd.DataFrame(series)

 21%|█████████████████▏                                                               | 53/249 [00:14<00:53,  3.69it/s]

could not retrieve data for url: https://brewdogrecipes.com/recipes/bramling-x

 35%|████████████████████████████▋                                                    | 88/249 [00:23<00:41,  3.88it/s]

could not retrieve data for url: https://brewdogrecipes.com/recipes/ab-19

 82%|█████████████████████████████████████████████████████████████████▏              | 203/249 [00:55<00:12,  3.66it/s]

could not retrieve data for url: https://brewdogrecipes.com/recipes/no-label

100%|████████████████████████████████████████████████████████████████████████████████| 249/249 [01:07<00:00,  3.70it/s]

df_parameters.head()

df = df_parameters.copy()

df['mash_temp'] = df['mash_temp'].str.extract('(.*:?)°C').astype(float)
df['mash_time'] = df['mash_time'].str.extract('(.*:?)mins').astype(float)
df['fermentation_temp'] = df['fermentation_temp'].str.extract('(.*:?)°C').astype(float)

df.head()

import altair as alt

alt.Chart(df).mark_point().encode(
    x='mash_temp',
) & alt.Chart(df).mark_point().encode(
    x='mash_time',
) & alt.Chart(df).mark_point().encode(
    x='fermentation_temp',
)

alt.Chart(df).mark_bar().encode(
    alt.X("mash_temp:Q", bin=True),
    y='count()',
) & alt.Chart(df).mark_bar().encode(
    alt.X("mash_time:Q", bin=True),
    y='count()',
) & alt.Chart(df).mark_bar().encode(
    alt.X("fermentation_temp:Q", bin=True),
    y='count()',
)

alt.Chart(df).mark_point().encode(
    x='mash_temp',
    y='mash_time',
    tooltip=['beer_name', 'mash_temp', 'mash_time', 'fermentation_temp']
)

df.describe().loc['mean'].to_frame()

	beer_name	mash_temp	mash_time	fermentation_temp
0	#1 PUNK IPA 2007 - 2010	65°C	75mins	19°C
1	#2 PUNK IPA 2010 - CURRENT	66°C	75mins	19°C
2	#3 THE PHYSICS	65°C	75mins	19°C
3	#4 RIPTIDE	65°C	75mins	20°C
4	#5 HOP ROCKER	65°C	75mins	10°C

	beer_name	mash_temp	mash_time	fermentation_temp
0	#1 PUNK IPA 2007 - 2010	65.0	75.0	19.0
1	#2 PUNK IPA 2010 - CURRENT	66.0	75.0	19.0
2	#3 THE PHYSICS	65.0	75.0	19.0
3	#4 RIPTIDE	65.0	75.0	20.0
4	#5 HOP ROCKER	65.0	75.0	10.0

Brewdog Beers Statistics

Scraping the data¶

Plotting the data¶

Comments

	names	urls
count	249	249
unique	249	249
top	#244 AB:21	https://brewdogrecipes.com/recipes/b-sides-whi...
freq	1	1

	mean
mash_temp	64.405622
mash_time	72.610442
fermentation_temp	23.151020