from bs4 import BeautifulSoup
import requests

soup = BeautifulSoup(requests.get('http://elections.interieur.gouv.fr/presidentielle-2017/').text, 'html.parser')

map_tag = soup.find('map')

area_tags = map_tag.find_all('area')

links = [tag.attrs['href'] for tag in area_tags]

unique_links = set(links)

def extract_candidate_votes(url):
    "Extracts votes for all candidates from a given url."
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    # département name
    dept = soup.find(class_='span12 pub-fil-ariane').find_all('a')[-1].text
    # candidate votes
    table = soup.find('tbody')
    results = {}
    for row in table.find_all('tr'):
        results[row.td.text] = int(row.td.next_sibling.next_sibling.text.replace(' ', ''))
    # other data: non-voters, nil, invalid
    second_table = soup.find_all('tbody')[1]
    for row in second_table.find_all('tr'):
        if row.td.text in ['Blancs', 'Nuls', 'Abstentions']:
            results[row.td.text] = int(row.td.next_sibling.next_sibling.text.replace(' ', ''))
    return dept, results

url = "http://elections.interieur.gouv.fr/presidentielle-2017/" + list(unique_links)[0]

extract_candidate_votes(url)

('Haute-Garonne (31)',
 {'Abstentions': 220547,
  'Blancs': 60546,
  'M. Emmanuel MACRON': 436941,
  'Mme Marine LE PEN': 166668,
  'Nuls': 22130})

depts = []
results = []
for link in unique_links:
    dept, result = extract_candidate_votes("http://elections.interieur.gouv.fr/presidentielle-2017/" + link) 
    depts.append(dept)
    results.append(result)

import pandas as pd

df_votes = pd.DataFrame(results, index=depts)

df_votes.head(10)

df_votes.sum(axis=0).to_frame(name='Somme des votes')

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-talk')

df_votes.sum(axis=0).to_frame(name='Somme des inscrits').sort_values(by='Somme des inscrits', ascending=False).plot.bar()

<matplotlib.axes._subplots.AxesSubplot at 0x108d7abe0>

(df_votes.sum(axis=0) / df_votes.sum(axis=0).sum() * 100).to_frame(name='% des inscrits').sort_values(by='% des inscrits', ascending=False).plot.bar()

<matplotlib.axes._subplots.AxesSubplot at 0x108e0c828>

vote_sum = df_votes.sum(axis=1).sort_values(ascending=True)
df_votes['total'] = vote_sum
df_votes = df_votes.sort_values(by='total', ascending=True)
df_votes = df_votes.drop('total', 1)

fig, ax = plt.subplots(figsize=(10, 12))
df_votes.plot.barh(ax=ax, stacked=True, fontsize=8, width=1)
plt.tight_layout()

import shapefile

r = shapefile.Reader(r"files/departement_shapes/DEPARTEMENT.shp")
shapes = r.shapes()
records = r.records()
fields = r.fields

import numpy as np
from matplotlib.collections import LineCollection

def draw_departements_and_centroids(ax, records, shapes, radii=None):
    "Draws a map of the départements."
    if radii is None:
        radii = [10000 for _ in range(len(records))]
    for record, shape, radius in zip(records, shapes, radii):
        lons, lats = zip(*shape.points)
        data = np.array([lons, lats]).T

        if len(shape.parts) == 1:
            segs = [data,]
        else:
            segs = []
            for i in range(1,len(shape.parts)):
                index = shape.parts[i-1]
                index2 = shape.parts[i]
                segs.append(data[index:index2])
            segs.append(data[index2:])

        lines = LineCollection(segs,antialiaseds=(1,))
        lines.set_edgecolors('k')
        lines.set_linewidth(0.3)
        ax.add_collection(lines)
        
        centroid = data.mean(axis=0)
        ax.add_artist(plt.Circle(centroid, radius=radius, alpha=0.7))

plt.figure(figsize=(10, 10))
ax = plt.subplot(111)

draw_departements_and_centroids(ax, records, shapes)

df = pd.DataFrame(records, columns=[f[0] for f in fields[1:]])
xmin, xmax = df['X_CHF_LIEU'].min(), df['X_CHF_LIEU'].max()
ymin, ymax = df['Y_CHF_LIEU'].min(), df['Y_CHF_LIEU'].max()
plt.xlim(xmin - 100000, xmax + 100000)
plt.ylim(ymin - 100000, ymax + 100000)
plt.axis('off')

(71326.0, 1328512.0, 6008968.0, 7159443.0)

def get_radii(label_idx, iterable=df.CODE_DEPT):
    """Returns sorted radii by correct label index."""
    s = df_votes[df_votes.columns[label_idx]]
    radii = []
    for code_dpt in iterable:
        values = s[s.index.str.contains("({})".format(code_dpt), regex=False)].values
        radii.append(values)
    return np.array(radii)

fig = plt.figure(figsize=(10, 15))
for label_idx in np.arange(df_votes.columns.size):
    ax = plt.subplot(3, 2, label_idx+1)

    radii = get_radii(label_idx) / 6
    draw_departements_and_centroids(ax, records, shapes, radii)

    xmin, xmax = df['X_CHF_LIEU'].min(), df['X_CHF_LIEU'].max()
    ymin, ymax = df['Y_CHF_LIEU'].min(), df['Y_CHF_LIEU'].max()
    ax.set_xlim(xmin - 100000, xmax + 100000)
    ax.set_ylim(ymin - 100000, ymax + 100000)
    ax.axis('off')
    ax.set_title(df_votes.columns[label_idx])

import networkx as nx

code_to_idx = dict(zip(df.CODE_DEPT, np.arange(df.shape[0])))

idx_to_code = dict(zip(np.arange(df.shape[0]), df.CODE_DEPT))

centroids = np.array([np.array(shape.points).mean(axis=0) for shape in shapes])

from scipy.spatial import KDTree

kdtree = KDTree(centroids)

kdtree.data.shape

(96, 2)

kdtree.query(kdtree.data[2], 3)

(array([     0.        ,  48030.21679557,  90082.12623391]),
 array([ 2, 44, 89]))

vote_sum[vote_sum.index.str.contains("({})".format(12), regex=False)].values[0]

218179

pop_dict = dict(zip(df.CODE_DEPT, [vote_sum[vote_sum.index.str.contains("({})".format(idx), regex=False)].values[0] for idx in df.CODE_DEPT]))

G=nx.Graph()
for dept_id, centroid in zip(df.CODE_DEPT, range(centroids.shape[0])):
    G.add_node(dept_id)
    node_id = code_to_idx[dept_id]
    dists, neighbors = kdtree.query(kdtree.data[node_id], k=7)
    for neighbor in neighbors[1:]:
        neighbor_id = idx_to_code[neighbor]
        weight = -(pop_dict[neighbor_id])
        G.add_edge(dept_id, neighbor_id, weight=weight)

weight

-428840

initial_pos = dict(zip([idx_to_code[idx] for idx in range(centroids.shape[0])], centroids))

nx.draw(G, pos=initial_pos, with_labels=True)

/Users/kappamaki/anaconda/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:126: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  b = plt.ishold()
/Users/kappamaki/anaconda/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:138: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  plt.hold(b)
/Users/kappamaki/anaconda/lib/python3.5/site-packages/matplotlib/__init__.py:917: UserWarning: axes.hold is deprecated. Please remove it from your matplotlibrc and/or style files.
  warnings.warn(self.msg_depr_set % key)
/Users/kappamaki/anaconda/lib/python3.5/site-packages/matplotlib/rcsetup.py:152: UserWarning: axes.hold is deprecated, will be removed in 3.0
  warnings.warn("axes.hold is deprecated, will be removed in 3.0")

pos=nx.spring_layout(G, pos=initial_pos, fixed=['75'], iterations=2, k=1e9, weight='weight')

nx.draw(G, pos, with_labels=True)

/Users/kappamaki/anaconda/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:126: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  b = plt.ishold()
/Users/kappamaki/anaconda/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:138: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  plt.hold(b)
/Users/kappamaki/anaconda/lib/python3.5/site-packages/matplotlib/__init__.py:917: UserWarning: axes.hold is deprecated. Please remove it from your matplotlibrc and/or style files.
  warnings.warn(self.msg_depr_set % key)
/Users/kappamaki/anaconda/lib/python3.5/site-packages/matplotlib/rcsetup.py:152: UserWarning: axes.hold is deprecated, will be removed in 3.0
  warnings.warn("axes.hold is deprecated, will be removed in 3.0")

nx.draw(G, pos, with_labels=True, node_size=[pop_dict[i]/500 for i in G.nodes()], alpha=0.7, width=0., scale=4)

/Users/kappamaki/anaconda/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:126: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  b = plt.ishold()
/Users/kappamaki/anaconda/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:138: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  plt.hold(b)
/Users/kappamaki/anaconda/lib/python3.5/site-packages/matplotlib/__init__.py:917: UserWarning: axes.hold is deprecated. Please remove it from your matplotlibrc and/or style files.
  warnings.warn(self.msg_depr_set % key)
/Users/kappamaki/anaconda/lib/python3.5/site-packages/matplotlib/rcsetup.py:152: UserWarning: axes.hold is deprecated, will be removed in 3.0
  warnings.warn("axes.hold is deprecated, will be removed in 3.0")

df_votes['abstention, nuls, blancs'] = df_votes.Abstentions + df_votes.Nuls + df_votes.Blancs

df_votes.columns

Index(['Abstentions', 'Blancs', 'M. Emmanuel MACRON', 'Mme Marine LE PEN',
       'Nuls', 'abstention, nuls, blancs'],
      dtype='object')

def get_radii_by_col(col_label, iterable=df.CODE_DEPT):
    """Returns sorted radii by correct label index."""
    s = df_votes[col_label]
    radii = []
    for code_dpt in iterable:
        values = s[s.index.str.contains("({})".format(code_dpt), regex=False)].values
        radii.append(values)
    return np.array(radii)

fig = plt.figure(figsize=(15, 4), dpi=200)
for ind, col_label in enumerate(['M. Emmanuel MACRON', 'Mme Marine LE PEN', 'abstention, nuls, blancs']):
    ax = plt.subplot(1, 3, ind+1)
    radii = get_radii_by_col(col_label, iterable=G.nodes()) / 1000
    nx.draw(G, pos, with_labels=True, node_size=radii, ax=ax, alpha=0.6, width=0., font_size=7, scale=10.)
    ax.set_title(col_label)
    plt.axis('equal')

/Users/kappamaki/anaconda/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:126: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  b = plt.ishold()
/Users/kappamaki/anaconda/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:138: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  plt.hold(b)
/Users/kappamaki/anaconda/lib/python3.5/site-packages/matplotlib/__init__.py:917: UserWarning: axes.hold is deprecated. Please remove it from your matplotlibrc and/or style files.
  warnings.warn(self.msg_depr_set % key)
/Users/kappamaki/anaconda/lib/python3.5/site-packages/matplotlib/rcsetup.py:152: UserWarning: axes.hold is deprecated, will be removed in 3.0
  warnings.warn("axes.hold is deprecated, will be removed in 3.0")

	Abstentions	Blancs	M. Emmanuel MACRON	Mme Marine LE PEN	Nuls
Haute-Garonne (31)	220547	60546	436941	166668	22130
Loiret (45)	103643	30731	195004	113735	9508
Aveyron (12)	43933	16665	109340	40838	7403
Haute-Marne (52)	32083	9057	45192	44331	3456
Territoire de Belfort (90)	23202	6743	36340	26121	2873
Haute-Savoie (74)	127886	37148	249127	113718	10282
Val-d'Oise (95)	204202	42933	342000	129536	11664
Orne (61)	45577	14762	88484	55070	4798
Bouches-du-Rhône (13)	364023	81855	519335	378455	26391
Wallis et Futuna (986)	2369	76	4715	1243	61

Des cartes du second tour de l'élection présidentielle 2017

Obtention des données depuis le site du ministère de l'intérieur et analyse sommaire¶

Cartes¶

Conclusions¶

Comments

	Somme des votes
Abstentions	12101416
Blancs	3019724
M. Emmanuel MACRON	20753798
Mme Marine LE PEN	10644118
Nuls	1049532