In [3]:
%load_ext autoreload
%autoreload 2
from importlib import reload
    
import matplotlib.pyplot as plt
from matplotlib.cm import viridis
from matplotlib.cm import Greys


%matplotlib inline  
import numpy as np

from sklearn.manifold import TSNE
from sklearn.metrics import pairwise
import sklearn as sk
import community
import seaborn as sns

import networkx as nx
import pandas as pd
import subprocess
import sys
import pickle

We start by loading a graph

we simplify it a little bit for this example by removing some edges and weights. You can modify this

In [23]:
file = 'GOTbook1.edges'
isDirected = False

G = nx.read_edgelist(file,delimiter=",",data=(("Type",str),("weight",int),("book",int)),create_using=nx.Graph())


ws = nx.get_edge_attributes(G,"weight")
to_remove = [n for n,w in ws.items() if w <5]
G.remove_edges_from(to_remove)

deg = G.degree()
to_remove = [n for n in deg if deg[n] <3]
G.remove_nodes_from(to_remove)


#G = G.to_directed()
print("nb Edges", G.number_of_edges())
print("nb Nodes", G.number_of_nodes())
G = nx.convert_node_labels_to_integers(G,label_attribute="name")
nb Edges 340
nb Nodes 81
In [24]:
dirTemp = "temp/" #directory for manipulating graphs and embedding

We save it using a "pickle" to load it again in future uses

Pickle is just a way to save a python object in a file such that we can reload it later in the exact same form

In [25]:
pickle.dump( G, open( dirTemp+"pickledGraph.p", "wb" ) )

Running the LE (Laplacian Eigenmaps) (or HOPE algorithm)

Uses the GEM library

It is included in the directory I provide, but you can download the latest version at: https://github.com/palash1992/GEM

you can install it properly as a library or just load the directory with the command below

In [16]:
sys.path.insert(0, 'algorithms/GEM-master')
In [17]:
from gem.embedding.gf       import GraphFactorization
from gem.embedding.hope     import HOPE
from gem.embedding.lap      import LaplacianEigenmaps
In [18]:
# different embeddings take different coefficient: 
#but they all take the number of dimensions.
model = LaplacianEigenmaps(d=8)
#model= HOPE(d=8, beta=0.01)
embedding = model
embedding.learn_embedding(graph=G.to_directed(), edge_f=None, is_weighted=False, no_python=True)
shape = str(embedding.get_embedding().shape)


np.savetxt(dirTemp+"LE.emb",  np.real(embedding.get_embedding()), delimiter=" ",header=shape,comments="")
#np.savetxt(dirTemp+"LE.emb",  np.real(embedding.get_embedding()), delimiter=" ",header=shape)
Laplacian matrix recon. error (low rank): 9.364159

Running The node2vec algorithm

Require the compiled node2vec program. You need to compile it for your own computer.

I included the sources in the "algorithm" directory, otherwise you can donwload it there: https://github.com/snap-stanford/snap and run the "make" command. node2vec is in the "example" directory

For mac, if you do not have gcc: install command line tools:

xcode-select --install

In [26]:
"""
 -i:Input graph path (default:'graph/karate.edgelist')
   -o:Output graph path (default:'emb/karate.emb')
   -d:Number of dimensions. Default is 128 (default:128)
   -l:Length of walk per source. Default is 80 (default:80)
   -r:Number of walks per source. Default is 10 (default:10)
   -k:Context size for optimization. Default is 10 (default:10)
   -e:Number of epochs in SGD. Default is 1 (default:1)
   -p:Return hyperparameter. Default is 1 (default:1)
   -q:Inout hyperparameter. Default is 1 (default:1)
   -v Verbose output. 
   -dr Graph is directed. 
   -w Graph is weighted. 
   -ow Output random walks instead of embeddings. 
"""
#nx.write_edgelist(G,"temp/tempGraph.graph",data=False)
nx.write_edgelist(G,"temp/tempGraph.graph",data=["weight"])

args = ["./algorithms/node2vec"]
args.append("-i:temp/tempGraph.graph")
args.append("-o:temp/node2vec.emb")
args.append("-d:%d" % 8) #dimension
#args.append("-l:%d" % self._walk_len) #walk length
#args.append("-r:%d" % self._num_walks) #number of walks
#args.append("-k:%d" % self._con_size) #context size
#args.append("-e:%d" % self._max_iter) #max iterations
#args.append("-p:%f" % self._ret_p) #
#args.append("-q:%f" % self._inout_p)
args.append("-v")
#args.append("-dr")
#args.append("-w")
        
string =""
for x in args:
    string+=x+" "
subprocess.check_output(string,shell=True)
Out[26]:
b'\nAn algorithmic framework for representational learning on graphs. [Oct  1 2018]\n================================================================================\nInput graph path (-i:)=temp/tempGraph.graph\nOutput graph path (-o:)=temp/node2vec.emb\nNumber of dimensions. Default is 128 (-d:)=8\nLength of walk per source. Default is 80 (-l:)=80\nNumber of walks per source. Default is 10 (-r:)=10\nContext size for optimization. Default is 10 (-k:)=10\nNumber of epochs in SGD. Default is 1 (-e:)=1\nReturn hyperparameter. Default is 1 (-p:)=1\nInout hyperparameter. Default is 1 (-q:)=1\nVerbose output. (-v)=YES\nGraph is directed. (-dr)=NO\nGraph is weighted. (-w)=NO\nOutput random walks instead of embeddings. (-ow)=NO\nRead 340 lines from temp/tempGraph.graph\n\rPreprocessing progress: 0.00% \n\rWalking Progress: 0.00%\n\rLearning Progress: 0.00% \rLearning Progress: 15.43% \rLearning Progress: 30.86% \rLearning Progress: 46.30% \rLearning Progress: 61.73% \rLearning Progress: 77.16% \rLearning Progress: 92.59% \n'

Running The struct2vec algorithm

You need struct2vec code that you can download there:

https://github.com/leoribeiro/struc2vec

(However, the version I provide is already compatible with python3, the original version has problems with it)

You also need to install some packages:

pip install fastdtw / conda install -c bioconda fastdtw

pip install gensim / conda install gensim

In [20]:
nx.write_edgelist(G,"temp/tempGraph.graph",data=False)
args = ["python algorithms/struc2vec-master/src/main.py"]
args.append("--input temp/tempGraph.graph")
args.append("--output temp/struc2vec.emb")

args.append("--dimensions 8")
#args.append("--num-walks 20")
#args.append("--walk-length 80")
#args.append("--window-size 5")
#args.append("--OPT1 True --OPT2 True --OPT3 True --until-layer 6")

string=""
for x in args:
    string+=x+" "

#try:
output = subprocess.check_output(string,shell=True)
#except subprocess.CalledProcessError as e:
#    output = e.output
#print(output)

loading an embedding from file

In [4]:
embeddingFile = "temp/node2vec.emb"
theEmbedding = np.loadtxt(open(embeddingFile, "rb"), delimiter=" ", skiprows=1)