# necessary imports
from pylab import *
%matplotlib inline

from IPython.html.widgets import interact

kanji = genfromtxt('files/kanji.csv', dtype=np.str, usecols=0, delimiter=',')

myvocab = genfromtxt('files/jp_vocab.csv', dtype=np.str, usecols=0, delimiter=',')

from codecs import decode
kanji = [decode(k, 'utf-8') for k in kanji]
myvocab = [decode(w, 'utf-8') for w in myvocab]

for k in kanji:
    print k,

淡 貯 腹 妃 翁 葬 鉄 賀 委 屈 漏 穏 料 庸 累 酌 糾 陵 星 斜 斉 沈 尾 展 雰

def get_associated_vocab(kanji):
    return filter(lambda s: kanji in s, myvocab)

words = get_associated_vocab(kanji[1])
for w in words:
    print w,

def print_my_vocab(kanji_char):
    words = get_associated_vocab(kanji_char)
    print "You selected character " + kanji_char
    if len(words) != 0:
        print "Matches in your vocabulary found!"
        for w in words:
            print w
    else: 
        print "No match in your vocabulary"

interact(lambda ind: print_my_vocab(kanji[ind]),
         ind=(0, len(kanji) - 1))

You selected character 料
Matches in your vocabulary found!
料理
晩御飯 は 日本 料理 レストランで食べました。おいしかったお好み焼き食べました。<br><br>
材料
小麦粉はケーキの主な材料だ。
食料品
給料

<function __main__.<lambda>>

import networkx as nx

G = nx.Graph()
G.add_node(u"車")
nx.draw_networkx(G, font_family='Aozora Mincho')

import datetime

figure(figsize=(10, 10))
G = nx.Graph()
today = datetime.date.today()
G.add_node(today)
for k in kanji:
    G.add_node(k)
    G.add_edge(today, k)
    for expr in get_associated_vocab(k):
        G.add_edge(k, expr)
nx.draw_networkx(G, font_family='Aozora Mincho', font_size=15, node_size=1000)

axis('off');

# creating the graph
G = nx.Graph()
today = datetime.date.today()
G.add_node(today)
for k in kanji:
    G.add_node(k)
    G.add_edge(today, k)
    for expr in get_associated_vocab(k):
        G.add_edge(k, expr)

# creating the node colors
node_color = []
for node in G:
    if node == today:
        node_color.append('w')
    elif node in kanji:
        node_color.append('r')
    else:
        node_color.append('y')

# plotting
figure(figsize=(10, 10))        
pos = nx.spring_layout(G)        
nx.draw(G, pos=pos, 
        with_labels=True,
        node_color=node_color,
        font_family='Aozora Mincho', font_size=15, 
        node_size=1000)

axis('off');

pos

{u'\u5983': array([ 0.63895377,  0.67500245]),
 u'\u6589': array([ 0.03343059,  0.3185518 ]),
 u'\u6599\u7406': array([ 0.97669519,  0.64441187]),
 u'\u6c88': array([ 0.31158778,  0.16515432]),
 u'\u6de1': array([ 0.0082795 ,  0.60559574]),
 u'\u6f0f': array([ 0.10637716,  0.18282049]),
 datetime.date(2014, 11, 5): array([ 0.33843229,  0.39933709]),
 u'\u8caf': array([ 0.06449885,  0.24593801]),
 u'\u6599': array([ 0.81613867,  0.5326309 ]),
 u'\u659c': array([ 0.71547702,  0.32610618]),
 u'\u661f': array([ 0.47509264,  0.80911579]),
 u'\u59d4\u54e1': array([ 0.92043342,  0.23380841]),
 u'\u50be\u659c': array([ 0.96810298,  0.32317835]),
 u'\u6d41\u308c\u661f': array([ 0.39479764,  0.9914399 ]),
 u'\u7d66\u6599': array([ 0.92811008,  0.72305464]),
 u'\u7d2f': array([ 0.59474906,  0.01805415]),
 u'\u5e74\u8cc0': array([ 0.46854098,  0.        ]),
 u'\u5eb8': array([ 0.        ,  0.42477971]),
 u'\u5c3e': array([ 0.51802025,  0.22415128]),
 u'\u7fc1': array([ 0.10560111,  0.64988196]),
 u'\u8cc0': array([ 0.37718882,  0.04015944]),
 u'\u9244': array([ 0.32000005,  0.80690719]),
 u'\u5c48': array([ 0.53514343,  0.47579271]),
 u'\u885b\u661f': array([ 0.63198603,  0.98454265]),
 u'\u914c': array([ 0.5064797 ,  0.07996336]),
 u'\u7a4f': array([ 0.09442873,  0.75785548]),
 u'\u5c55\u89a7\u4f1a': array([ 0.21722116,  0.08730433]),
 u'\u5c55': array([ 0.17416823,  0.15397226]),
 u'\u98df\u6599\u54c1': array([ 0.89176716,  0.19228279]),
 u'\u5730\u4e0b\u9244': array([ 0.48069608,  1.        ]),
 u'\u738b\u5983': array([ 0.8761446 ,  0.82716321]),
 u'\u9244\u9aa8': array([ 0.16174776,  0.86162271]),
 u'\u5c0f\u9ea6\u7c89\u306f\u30b1\u30fc\u30ad\u306e\u4e3b\u306a\u6750\u6599\u3060\u3002': array([ 0.99668281,  0.55111055]),
 u'\u6750\u6599': array([ 0.83223969,  0.87208056]),
 u'\u6669\u5fa1\u98ef \u306f \u65e5\u672c \u6599\u7406 \u30ec\u30b9\u30c8\u30e9\u30f3\u3067\u98df\u3079\u307e\u3057\u305f\u3002\u304a\u3044\u3057\u304b\u3063\u305f\u304a\u597d\u307f\u713c\u304d\u98df\u3079\u307e\u3057\u305f\u3002<br><br>': array([ 0.99848304,  0.45215401]),
 u'\u846c': array([ 0.28533294,  0.0447483 ]),
 u'\u96f0': array([ 0.01251561,  0.51255702]),
 u'\u7a4f\u3084\u304b': array([ 0.22307882,  0.91305033]),
 u'\u9675': array([ 0.07011004,  0.37885549]),
 u'\u8179': array([ 0.13798064,  0.52279467]),
 u'\u7cfe': array([ 0.3268805 ,  0.62509542]),
 u'\u59d4': array([ 0.69909341,  0.16646906])}

# creating the graph
G = nx.Graph()
today = datetime.date.today()

for k in kanji:
    G.add_node(k)
    for expr in get_associated_vocab(k):
        G.add_edge(k, expr)

# creating the node colors
node_color = []
for node in G:
    if node in kanji:
        node_color.append('r')
    else:
        node_color.append('y')

# computing positions 
pos = {}
r1 = 0.3 * len(kanji) / 100.
r2 = r1 / 4.
theta = linspace(0, 2 * pi, len(kanji), endpoint=False)
for ind, k in enumerate(kanji):
    pos[k] = r1 * array([cos(theta[ind]), sin(theta[ind])])
    words = get_associated_vocab(k)
    alpha = linspace(0, 2 * pi, len(words), endpoint=False) + theta[ind] + pi / 4 + (pi / 4) * (ind % 2)
    for ind2, w in enumerate(words):
        if not w in pos:
            pos[w] = r1 * array([cos(theta[ind]), sin(theta[ind])]) + r2 * array([cos(alpha[ind2]), sin(alpha[ind2])])
        
# plotting
figure(figsize=(10, 10))                
nx.draw(G, pos=pos, 
        with_labels=True,
        node_color=node_color,
        font_family='Aozora Mincho', font_size=15, 
        node_size=1000)

axis('off');

class particle(object):
    def __init__(self, name, pos, fixed=False, mass=1., neighbours=[], stiffness=0.1):
        # inits a particle with a given mass and a given stiffness linkind it to its neighbours
        self.pos = pos
        self.mass = mass
        self.fixed = fixed
        self.neighbours = neighbours
        self.speed = zeros((2))
        self.stiffness = stiffness
        self.name = name
        self.external_force = zeros((2))
        
    def move(self, dt):
        # moves the particle for a timestep dt
        self.speed += dt/self.mass * self.external_force
        self.pos += self.speed * dt
                
    def compute_spring_force(self):
        # computes the force exerted upon the node by its neighbours
        force_vector = zeros((2))
        for neighbour in self.neighbours:
            force_vector += self.stiffness * (particles[neighbour].pos - self.pos) 
        return force_vector

pos = nx.spring_layout(G)  
particles = {}
for k in kanji:
    fixed = True
    neighbours = get_associated_vocab(k)
    particles[k] = particle(k, pos[k], fixed=True, mass=len(neighbours) + 1, neighbours=neighbours)
    for neigh in neighbours:
        particles[neigh] = particle(neigh, pos[neigh], fixed=False, neighbours=[k])

def get_pos_dict():
    pos = dict(zip(particles.keys(), [particles[k].pos for k in particles]))
    pos[datetime.date(2014, 11, 5)] = [0., 0.]
    return pos

def plot_current_particles():
    figure(figsize=(10, 10))                
    nx.draw(G, pos=get_pos_dict(), 
        with_labels=True,
        node_color=node_color,
        font_family='Aozora Mincho', font_size=15, 
        node_size=1000)

def iterate(dt):
    for current_p in particles:
        external_force = zeros((2))
        for other_p in particles:
            if current_p != other_p:
                # sum repulsive force
                external_force -= particles[other_p].mass / (particles[current_p].pos - particles[other_p].pos) ** 2 * (particles[current_p].pos - particles[other_p].pos)
        external_force += particles[current_p].compute_spring_force()
        particles[current_p].external_force = external_force
    for current_p in particles:
        particles[current_p].move(dt)

iterate(10)
plot_current_particles()

def iterate_and_plot(dt):
    iterate(dt)
    figure(figsize=(10, 10))                
    nx.draw(G, pos=get_pos_dict(), 
        with_labels=True,
        node_color=node_color,
        font_family='Aozora Mincho', font_size=15, 
        node_size=1000)
    axis('off');

from IPython.html.widgets import ButtonWidget
from IPython.display import display, clear_output

b = ButtonWidget(description="iteration")
display(b)
b.on_click(lambda b: iterate_and_plot(0.1))

iterate_and_plot(0.02)

---------------------------------------------------------------------------
NetworkXError                             Traceback (most recent call last)
<ipython-input-65-da81b97f4cf4> in <module>()
----> 1 iterate_and_plot(0.02)

<ipython-input-54-11d0c0839f22> in iterate_and_plot(dt)
      7         node_color=node_color,
      8         font_family='Aozora Mincho', font_size=15,
----> 9         node_size=1000)
     10     axis('off');

C:\Python27\lib\site-packages\networkx\drawing\nx_pylab.pyc in draw(G, pos, ax, hold, **kwds)
    129         plt.hold(h)
    130     try:
--> 131         draw_networkx(G, pos=pos, ax=ax, **kwds)
    132         ax.set_axis_off()
    133         plt.draw_if_interactive()

C:\Python27\lib\site-packages\networkx\drawing\nx_pylab.pyc in draw_networkx(G, pos, with_labels, **kwds)
    262         pos = nx.drawing.spring_layout(G)  # default to spring layout
    263 
--> 264     node_collection = draw_networkx_nodes(G, pos, **kwds)
    265     edge_collection = draw_networkx_edges(G, pos, **kwds)
    266     if with_labels:

C:\Python27\lib\site-packages\networkx\drawing\nx_pylab.pyc in draw_networkx_nodes(G, pos, nodelist, node_size, node_color, node_shape, alpha, cmap, vmin, vmax, ax, linewidths, label, **kwds)
    373         xy = numpy.asarray([pos[v] for v in nodelist])
    374     except KeyError as e:
--> 375         raise nx.NetworkXError('Node %s has no position.'%e)
    376     except ValueError:
    377         raise nx.NetworkXError('Bad value in node positions.')

NetworkXError: Node datetime.date(2014, 11, 5) has no position.

particles[u"星"].move(1)
particles[u"星"].pos

array([             nan,  -1.10393199e+11])

pos

Building graphs with Japanese vocabulary and kanji

Simple text exploration of the data¶

Graphical exploration using NetworkX¶

Comments