%load_ext autoreload
%autoreload 2
from importlib import reload
import matplotlib.pyplot as plt
from matplotlib.cm import viridis
from matplotlib.cm import Greys
%matplotlib inline
import numpy as np
from sklearn.manifold import TSNE
from sklearn.metrics import pairwise
import sklearn as sk
import community
import seaborn as sns
import networkx as nx
import pandas as pd
import subprocess
import sys
import pickle
we simplify it a little bit for this example by removing some edges and weights. You can modify this
file = 'GOTbook1.edges'
isDirected = False
G = nx.read_edgelist(file,delimiter=",",data=(("Type",str),("weight",int),("book",int)),create_using=nx.Graph())
ws = nx.get_edge_attributes(G,"weight")
to_remove = [n for n,w in ws.items() if w <5]
G.remove_edges_from(to_remove)
deg = G.degree()
to_remove = [n for n in deg if deg[n] <3]
G.remove_nodes_from(to_remove)
#G = G.to_directed()
print("nb Edges", G.number_of_edges())
print("nb Nodes", G.number_of_nodes())
G = nx.convert_node_labels_to_integers(G,label_attribute="name")
dirTemp = "temp/" #directory for manipulating graphs and embedding
Pickle is just a way to save a python object in a file such that we can reload it later in the exact same form
pickle.dump( G, open( dirTemp+"pickledGraph.p", "wb" ) )
Uses the GEM library
It is included in the directory I provide, but you can download the latest version at: https://github.com/palash1992/GEM
you can install it properly as a library or just load the directory with the command below
sys.path.insert(0, 'algorithms/GEM-master')
from gem.embedding.gf import GraphFactorization
from gem.embedding.hope import HOPE
from gem.embedding.lap import LaplacianEigenmaps
# different embeddings take different coefficient:
#but they all take the number of dimensions.
model = LaplacianEigenmaps(d=8)
#model= HOPE(d=8, beta=0.01)
embedding = model
embedding.learn_embedding(graph=G.to_directed(), edge_f=None, is_weighted=False, no_python=True)
shape = str(embedding.get_embedding().shape)
np.savetxt(dirTemp+"LE.emb", np.real(embedding.get_embedding()), delimiter=" ",header=shape,comments="")
#np.savetxt(dirTemp+"LE.emb", np.real(embedding.get_embedding()), delimiter=" ",header=shape)
Require the compiled node2vec program. You need to compile it for your own computer.
I included the sources in the "algorithm" directory, otherwise you can donwload it there: https://github.com/snap-stanford/snap and run the "make" command. node2vec is in the "example" directory
For mac, if you do not have gcc: install command line tools:
xcode-select --install
"""
-i:Input graph path (default:'graph/karate.edgelist')
-o:Output graph path (default:'emb/karate.emb')
-d:Number of dimensions. Default is 128 (default:128)
-l:Length of walk per source. Default is 80 (default:80)
-r:Number of walks per source. Default is 10 (default:10)
-k:Context size for optimization. Default is 10 (default:10)
-e:Number of epochs in SGD. Default is 1 (default:1)
-p:Return hyperparameter. Default is 1 (default:1)
-q:Inout hyperparameter. Default is 1 (default:1)
-v Verbose output.
-dr Graph is directed.
-w Graph is weighted.
-ow Output random walks instead of embeddings.
"""
#nx.write_edgelist(G,"temp/tempGraph.graph",data=False)
nx.write_edgelist(G,"temp/tempGraph.graph",data=["weight"])
args = ["./algorithms/node2vec"]
args.append("-i:temp/tempGraph.graph")
args.append("-o:temp/node2vec.emb")
args.append("-d:%d" % 8) #dimension
#args.append("-l:%d" % self._walk_len) #walk length
#args.append("-r:%d" % self._num_walks) #number of walks
#args.append("-k:%d" % self._con_size) #context size
#args.append("-e:%d" % self._max_iter) #max iterations
#args.append("-p:%f" % self._ret_p) #
#args.append("-q:%f" % self._inout_p)
args.append("-v")
#args.append("-dr")
#args.append("-w")
string =""
for x in args:
string+=x+" "
subprocess.check_output(string,shell=True)
You need struct2vec code that you can download there:
https://github.com/leoribeiro/struc2vec
(However, the version I provide is already compatible with python3, the original version has problems with it)
You also need to install some packages:
pip install fastdtw / conda install -c bioconda fastdtw
pip install gensim / conda install gensim
nx.write_edgelist(G,"temp/tempGraph.graph",data=False)
args = ["python algorithms/struc2vec-master/src/main.py"]
args.append("--input temp/tempGraph.graph")
args.append("--output temp/struc2vec.emb")
args.append("--dimensions 8")
#args.append("--num-walks 20")
#args.append("--walk-length 80")
#args.append("--window-size 5")
#args.append("--OPT1 True --OPT2 True --OPT3 True --until-layer 6")
string=""
for x in args:
string+=x+" "
#try:
output = subprocess.check_output(string,shell=True)
#except subprocess.CalledProcessError as e:
# output = e.output
#print(output)
embeddingFile = "temp/node2vec.emb"
theEmbedding = np.loadtxt(open(embeddingFile, "rb"), delimiter=" ", skiprows=1)