import requests

r = requests.get('https://en.wikipedia.org/wiki/List_of_Roman_emperors')
r

<Response [200]>

from bs4 import BeautifulSoup
soup = BeautifulSoup(r.text, 'html.parser')

items = soup.select('table tr td')

len(items)

994

items[6]

<td>40 years, 7 months and 3 days</td>

items[6 + 7]

<td>22 years, 5 months and 27 days</td>

dates = [items[i] for i in range(len(items)) if (i + 1) % 7 == 0]

dates = dates[:86]

dates[:10]

[<td>40 years, 7 months and 3 days</td>,
 <td>22 years, 5 months and 27 days</td>,
 <td>3 years, 10 months and 6 days</td>,
 <td>13 years, 8 months and 18/19 days</td>,
 <td>13 years, 7 months and 27 days</td>,
 <td>7 months and 7 days</td>,
 <td>3 months and 1 day (91 days)</td>,
 <td>8 months and 3 days</td>,
 <td>9 years, 6 months and 3 days</td>,
 <td>2 years, 2 months and 20 days</td>]

dates = [items.text for items in dates]

dates[:10]

['40 years, 7 months and 3 days',
 '22 years, 5 months and 27 days',
 '3 years, 10 months and 6 days',
 '13 years, 8 months and 18/19 days',
 '13 years, 7 months and 27 days',
 '7 months and 7 days',
 '3 months and 1 day (91 days)',
 '8 months and 3 days',
 '9 years, 6 months and 3 days',
 '2 years, 2 months and 20 days']

names = [items[i] for i in range(len(items)) if (i + 6) % 7 == 0]

names = names[:86]

names[:10]

[<td><b><a href="/wiki/Augustus" title="Augustus">Augustus</a></b><br/>
 <small>IMPERATOR CAESAR DIVI FILIVS <b>AVGVSTVS</b></small></td>,
 <td><b><a href="/wiki/Tiberius" title="Tiberius">Tiberius</a></b><br/>
 <small><b>TIBERIVS</b> IVLIVS CAESAR AVGVSTVS</small></td>,
 <td><b><a href="/wiki/Caligula" title="Caligula">Caligula</a></b><br/>
 <small><b>GAIVS</b> IVLIVS CAESAR AVGVSTVS GERMANICVS</small></td>,
 <td><b><a href="/wiki/Claudius" title="Claudius">Claudius</a></b><br/>
 <small>TIBERIVS <b>CLAVDIVS</b> CAESAR AVGVSTVS GERMANICVS</small></td>,
 <td><b><a href="/wiki/Nero" title="Nero">Nero</a></b><br/>
 <small><b>NERO</b> CLAVDIVS CAESAR AVGVSTVS GERMANICVS</small></td>,
 <td><b><a href="/wiki/Galba" title="Galba">Galba</a></b><br/>
 <small>SERVIVS SVLPICIVS <b>GALBA</b> CAESAR AVGVSTVS</small></td>,
 <td><b><a href="/wiki/Otho" title="Otho">Otho</a></b><br/>
 <small>MARCVS SALVIVS <b>OTHO</b> CAESAR AVGVSTVS</small></td>,
 <td><b><a href="/wiki/Vitellius" title="Vitellius">Vitellius</a></b><br/>
 <small>AVLVS <b>VITELLIVS</b> GERMANICVS AVGVSTVS</small></td>,
 <td><b><a href="/wiki/Vespasian" title="Vespasian">Vespasian</a></b><br/>
 <small>TITVS FLAVIVS CAESAR <b>VESPASIANVS</b> AVGVSTVS</small></td>,
 <td><b><a href="/wiki/Titus" title="Titus">Titus</a></b><br/>
 <small><b>TITVS</b> FLAVIVS CAESAR VESPASIANVS AVGVSTVS</small></td>]

names = [name.find('a').text for name in names]

names[:10]

['Augustus',
 'Tiberius',
 'Caligula',
 'Claudius',
 'Nero',
 'Galba',
 'Otho',
 'Vitellius',
 'Vespasian',
 'Titus']

import pandas as pd

df = pd.DataFrame(data=list(zip(names, dates)), columns=('Emperor name', 'Years in office'))

df.head(12)

df.shape

(86, 2)

df = df[~ df['Years in office'].str.contains('Unknown')]

df.shape

(85, 2)

import re

p_years = re.compile('(\d+) year[s]?')

p_months = re.compile('(\d+) month[s]?')

p_days = re.compile('(\d+) day[s]?')

def to_timestamp(s):
    m = p_years.search(s)
    if m:
        return pd.Timedelta(value=int(m.groups()[0]) * 365, unit='D')
    else:
        m = p_months.search(s)
        if m:
            return pd.Timedelta(value=int(m.groups()[0]) * 30.5, unit='D')
        else:
            m = p_days.search(s)
            return pd.Timedelta(value=int(m.groups()[0]), unit='D')

df['Reign duration'] = [to_timestamp(s) for s in df['Years in office']]

df.head()

df['Reign duration (years)'] = [duration.days / 365.25 for duration in df['Reign duration']]

%matplotlib inline

import matplotlib.pyplot as plt

plt.style.use('bmh')

fig, ax = plt.subplots(figsize=(15, 5))
df.plot(kind='bar', ax=ax, x='Emperor name', y='Reign duration (years)')

<matplotlib.axes._subplots.AxesSubplot at 0x8e82550>

df.hist(column='Reign duration (years)', bins=5)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000000009109DA0>]], dtype=object)

df.hist(column='Reign duration (years)', cumulative='True', bins=50)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000A9DC940>]], dtype=object)

cumsum = df['Reign duration (years)'].cumsum()

cumsum.index.values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85], dtype=int64)

plt.plot(cumsum, cumsum.index.values)
plt.ylabel('number of emperors that have reigned over the empire')
plt.xlabel('duration of empire (years)')

<matplotlib.text.Text at 0xaa9c6d8>

plt.figure(figsize=(10, 5))
plt.plot(cumsum, cumsum.index.values)
plt.ylabel('number of emperors that have reigned over the empire')
plt.xlabel('duration of the Roman empire (years)')
plt.text(20, 17, 'Pax Romana', fontsize=20)
plt.text(230, 35, 'Third century crisis', fontsize=12)
plt.text(450, 50, 'Renewed stability: the Dominate', fontsize=20)
plt.text(600, 85, 'Fall of the western empire', fontsize=12)

<matplotlib.text.Text at 0x5c6c240>

Roman Emperors of the West: Plots of their Rise and Fall

Getting the data¶

Visualizing the data¶

Comments

	Emperor name	Years in office
0	Augustus	40 years, 7 months and 3 days
1	Tiberius	22 years, 5 months and 27 days
2	Caligula	3 years, 10 months and 6 days
3	Claudius	13 years, 8 months and 18/19 days
4	Nero	13 years, 7 months and 27 days
5	Galba	7 months and 7 days
6	Otho	3 months and 1 day (91 days)
7	Vitellius	8 months and 3 days
8	Vespasian	9 years, 6 months and 3 days
9	Titus	2 years, 2 months and 20 days
10	Domitian	15 years and 4 days
11	Nerva	1 year, 4 months and 9 days

	Emperor name	Years in office	Reign duration
0	Augustus	40 years, 7 months and 3 days	14600 days
1	Tiberius	22 years, 5 months and 27 days	8030 days
2	Caligula	3 years, 10 months and 6 days	1095 days
3	Claudius	13 years, 8 months and 18/19 days	4745 days
4	Nero	13 years, 7 months and 27 days	4745 days