import networkx as nx
import pandas as pd
import numpy as np
import collections
import seaborn as sns
import matplotlib.pyplot as plt
from operator import itemgetter
%matplotlib inline
G = nx.read_edgelist("airportData.edges")
print("#nodes: ",len(G.nodes()))
print("#edges: ",len(G.edges()))
print("#avg. clustering:",nx.average_clustering(G))
print("#Global clustering coefficient:",nx.transitivity(G))
degree_sequence = sorted([d for n, d in G.degree()], reverse=True)
sns.distplot(degree_sequence, kde=False, rug=False);
degreeCount = collections.Counter(degree_sequence)
deg, cnt = zip(*degreeCount.items())
deg = np.array(list(deg))
cnt = np.array(list(cnt))
f, ax = plt.subplots(figsize=(7, 7))
ax.set(xscale="log", yscale="log")
sns.regplot(deg,cnt,fit_reg=False)
## Checking top nodes for some centralities
TopDegree = nx.degree(G)
TopDegree = sorted(TopDegree,key=itemgetter(1),reverse=True)[:5]
TopDegree
CL = nx.closeness_centrality(G)
CL = sorted([(n,k) for n,k in CL.items()],key=itemgetter(1),reverse=True)[:5]
CL
BTW = nx.approximate_current_flow_betweenness_centrality(G)
BTW = sorted([(n,k) for n,k in BTW.items()],key=itemgetter(1),reverse=True)[:5]
BTW
PageRank = nx.pagerank(G)
PageRank = sorted([(n,k) for n,k in PageRank.items()],key=itemgetter(1),reverse=True)[:5]
PageRank
// Be carefull, very slow...
//Commu = nx.communicability_betweenness_centrality(G)
//Commu = sorted([(n,k) for n,k in Commu.items()],key=itemgetter(1),reverse=True)[:5]
//Commu
nedge = nx.edge_betweenness_centrality(G,k=100)
nedge = sorted([(n,k) for n,k in nedge.items()],key=itemgetter(1),reverse=True)[:5]
nedge
smallNodes = [n for (n,v) in nx.degree(G) if v<50]
G.remove_nodes_from(smallNodes)
nedge = nx.edge_betweenness_centrality(G,k=100)
nedge = sorted([(n,k) for n,k in nedge.items()],key=itemgetter(1),reverse=True)[:5]
nedge