import json
import os
import networkx as nx
import networkx.algorithms.community as nx_comm
import numpy as np
import matplotlib.pyplot as plt
import copy as cp
from itertools import filterfalse


def relevance_filter(G, nodes_size=1000, link_rate=0.05, normalisation=True):
    """Reshape a graph by filtering the streamers that have less than 1000 viewers cumulated,
    and reweight the graph by w = # of viewers in common/min(# of viewers of streamer 1, # of viewers of streamer 2)
    then by filtering edges that have a weight inferior to 0.05. 
    Thus it means that there is a link between two streamers,
    if 5% of the smaller streamer viewers are also viewers of the bigger streamer. """
    G_filtered = cp.deepcopy(G)
    print(f"{len(G.nodes)} nodes")
    print(f"{len(G.edges)} edges")

    print("[INFO] filtering nodes")
    to_dump = filterfalse(lambda n:n[1]>nodes_size, G_filtered.nodes("viewers"))
    nodes_to_dump, _ = zip(*to_dump)
    G_filtered.remove_nodes_from(nodes_to_dump)

    print(f"{len(G.nodes)-len(G_filtered.nodes)} nodes removed")
    print(f"{len(G.edges)-len(G_filtered.edges)} edges removed")

    print("[INFO] filtering edges")
    f = lambda e:e[2]["weight"]/min(G_filtered.nodes[e[0]]["viewers"],G_filtered.nodes[e[1]]["viewers"])>link_rate
    to_dump = cp.deepcopy(list(filterfalse(f, G_filtered.edges.data())))
    G_filtered.remove_edges_from(list(to_dump))
    print(f"{len(G.edges)-len(G_filtered.edges)} edges removed")
    if normalisation:
        for streamer1, streamer2, data in G_filtered.edges.data():
            weight = data['weight']
            viewer_streamer_1 = G_filtered.nodes.data('viewers')[streamer1]
            viewer_streamer_2 = G_filtered.nodes.data('viewers')[streamer2]
            weight_norm = weight/min(viewer_streamer_1, viewer_streamer_2)
            data['weight'] = weight_norm
    return G_filtered


sub_base = "_1D"
base = "./Streamers_fr" + sub_base + "/"
viewers_file = "viewers.json"
graph_file = "./graphs/G_streamers_one_time_link_1D.graphml"


G = nx.readwrite.graphml.read_graphml(graph_file)
G_filtered = relevance_filter(G)

753 nodes
206094 edges
[INFO] filtering nodes
520 nodes removed
180225 edges removed
[INFO] filtering edges
204401 edges removed


degree_sequence = sorted([d for n, d in G.degree(weight='weight')], reverse=True)
dmax = max(degree_sequence)

fig = plt.figure("Degree of a random graph", figsize=(8, 8))
# Create a gridspec for adding subplots of different sizes
axgrid = fig.add_gridspec(5, 4)

ax0 = fig.add_subplot(axgrid[0:3, :])
Gcc = G.subgraph(sorted(nx.connected_components(G), key=len, reverse=True)[0])
pos = nx.spring_layout(Gcc, weight='weight', seed=10396953)
nx.draw_networkx_edges(Gcc, pos, ax=ax0, alpha=0.1)
_, viewers_number = zip(*list(Gcc.nodes("viewers")))
scaled_viewers_number = list(map(lambda x:x/300, viewers_number))
nx.draw_networkx_nodes(Gcc, pos, ax=ax0, node_size=scaled_viewers_number)
ax0.set_title("Connected components of G")
ax0.set_axis_off()

ax1 = fig.add_subplot(axgrid[3:, :2])
ax1.plot(degree_sequence, "b-", marker="o")
ax1.set_title("Degree Rank Plot")
ax1.set_ylabel("Degree")
ax1.set_xlabel("Rank")

ax2 = fig.add_subplot(axgrid[3:, 2:])
ax2.hist(degree_sequence, bins=100)
ax2.set_title("Degree histogram")
ax2.set_xlabel("Degree")
ax2.set_ylabel("# of Nodes")

fig.tight_layout()
plt.show()


degree_sequence = sorted([d for n, d in G_filtered.degree(weight='weight')], reverse=True)
dmax = max(degree_sequence)

fig = plt.figure(figsize=(8, 8))
# Create a gridspec for adding subplots of different sizes
axgrid = fig.add_gridspec(5, 4)

ax0 = fig.add_subplot(axgrid[0:3, :])
Gcc = G_filtered.subgraph(sorted(nx.connected_components(G_filtered), key=len, reverse=True)[0])
pos = nx.spring_layout(Gcc, weight='weight', seed=10396953)
nx.draw_networkx_edges(Gcc, pos, ax=ax0, alpha=0.1)
_, viewers_number = zip(*list(Gcc.nodes("viewers")))
scaled_viewers_number = list(map(lambda x:x/300, viewers_number))
print(len(scaled_viewers_number))
nx.draw_networkx_nodes(Gcc, pos, ax=ax0, node_size=scaled_viewers_number)
ax0.set_title("Connected components of G")
ax0.set_axis_off()

ax1 = fig.add_subplot(axgrid[3:, :2])
ax1.plot(degree_sequence, "b-", marker="o")
ax1.set_title("Degree Rank Plot")
ax1.set_ylabel("Degree")
ax1.set_xlabel("Rank")

ax2 = fig.add_subplot(axgrid[3:, 2:])
ax2.hist(degree_sequence, bins=50)
ax2.set_title("Degree histogram")
ax2.set_xlabel("Degree")
ax2.set_ylabel("# of Nodes")

fig.tight_layout()
plt.show()

231


a1 = nx.algorithms.assortativity.degree_assortativity_coefficient(G, weight="weight")
a2 = nx.algorithms.assortativity.attribute_assortativity_coefficient(G, "viewers")
a3 = nx.algorithms.assortativity.numeric_assortativity_coefficient(G, "viewers")
a4 = nx.algorithms.assortativity.average_degree_connectivity(G, weight="weight")


print(f"degree_assortativity_coefficient : {a1}\n\
attribute_assortativity_coefficient : {a2}\n\
numeric_assortativity_coefficient : {a3}")

fig = plt.figure()
degrees = sorted(a4.keys())
ax = fig.add_axes([0,0,1,1])
ax.scatter(degrees, [a4[degree] for degree in degrees])
ax.set_title("average_degree_connectivity")
ax.set_xlabel("Degree")
ax.set_ylabel("Connectivity")

ax = fig.add_axes([1.1,0,1,1])
ax.hist(a4, bins=60)
ax.set_title("Connectivity histogram")
ax.set_xlabel("Connectivity")
ax.set_ylabel("# of Nodes")

plt.show()

degree_assortativity_coefficient : -0.014267971271065362
attribute_assortativity_coefficient : -0.0013754749578557966
numeric_assortativity_coefficient : -0.009911478270805477


a1 = nx.algorithms.assortativity.degree_assortativity_coefficient(G_filtered, weight="weight")
a2 = nx.algorithms.assortativity.attribute_assortativity_coefficient(G_filtered, "viewers")
a3 = nx.algorithms.assortativity.numeric_assortativity_coefficient(G_filtered, "viewers")
a4 = nx.algorithms.assortativity.average_degree_connectivity(G_filtered, weight="weight")


print(f"degree_assortativity_coefficient : {a1}\n\
attribute_assortativity_coefficient : {a2}\n\
numeric_assortativity_coefficient : {a3}")

fig = plt.figure()
degrees = sorted(a4.keys())
ax = fig.add_axes([0,0,1,1])
ax.scatter(degrees, [a4[degree] for degree in degrees])
ax.set_title("average_degree_connectivity")
ax.set_xlabel("Degree")
ax.set_ylabel("Connectivity")

ax = fig.add_axes([1.1,0,1,1])
ax.hist(a4, bins=20)
ax.set_title("Connectivity histogram")
ax.set_xlabel("Connectivity")
ax.set_ylabel("# of Nodes")

plt.show()

degree_assortativity_coefficient : -0.2936018931033102
attribute_assortativity_coefficient : -0.012213309130847044
numeric_assortativity_coefficient : -0.22417407421479563


components = nx.connected_components(G_filtered)
largest_component = max(components, key=len)
H = G_filtered.subgraph(largest_component)

# compute centrality
centrality = nx.betweenness_centrality(H, k=50, weight='weight', endpoints=True)

# compute community structure
lpc = nx.community.label_propagation_communities(H)
community_index = {n: i for i, com in enumerate(lpc) for n in com}

#### draw graph ####
fig, ax = plt.subplots(figsize=(20, 15))
pos = nx.spring_layout(H, k=0.15, seed=4572321)
node_color = [community_index[n] for n in H]
node_size = [v * 20000 for v in centrality.values()]
nx.draw_networkx(
    H,
    pos=pos,
    with_labels=False,
    node_color=node_color,
    node_size=node_size,
    edge_color="gainsboro",
    alpha=0.4,
)

# Title/legend
font = {"color": "k", "fontweight": "bold", "fontsize": 20}
ax.set_title("Gene functional association network (C. elegans)", font)
# Change font color for legend
font["color"] = "r"

ax.text(
    0.80,
    0.10,
    "node color = community structure",
    horizontalalignment="center",
    transform=ax.transAxes,
    fontdict=font,
)
ax.text(
    0.80,
    0.06,
    "node size = betweeness centrality",
    horizontalalignment="center",
    transform=ax.transAxes,
    fontdict=font,
)

# Resize figure for label readibility
ax.margins(0.1, 0.05)
fig.tight_layout()
plt.axis("off")
plt.show()


components = nx.connected_components(G_filtered)
largest_component = max(components, key=len)
H = G_filtered.subgraph(largest_component)

# compute centrality
centrality = nx.betweenness_centrality(H, k=50, weight='weight', endpoints=True)

# compute community structure
lpc = nx.community.asyn_fluidc(H,k=25)
community_index = {n: i for i, com in enumerate(lpc) for n in com}

#### draw graph ####
fig, ax = plt.subplots(figsize=(20, 15))
pos = nx.spring_layout(H, k=0.15, seed=4572321)
node_color = [community_index[n] for n in H]
node_size = [v * 20000 for v in centrality.values()]
nx.draw_networkx(
    H,
    pos=pos,
    with_labels=False,
    node_color=node_color,
    node_size=node_size,
    edge_color="gainsboro",
    alpha=0.4,
)

# Title/legend
font = {"color": "k", "fontweight": "bold", "fontsize": 20}
ax.set_title("Gene functional association network (C. elegans)", font)
# Change font color for legend
font["color"] = "r"

ax.text(
    0.80,
    0.10,
    "node color = community structure",
    horizontalalignment="center",
    transform=ax.transAxes,
    fontdict=font,
)
ax.text(
    0.80,
    0.06,
    "node size = betweeness centrality",
    horizontalalignment="center",
    transform=ax.transAxes,
    fontdict=font,
)

# Resize figure for label readibility
ax.margins(0.1, 0.05)
fig.tight_layout()
plt.axis("off")
plt.show()


components = nx.connected_components(G_filtered)
largest_component = max(components, key=len)
H = G_filtered.subgraph(largest_component)

# compute centrality
centrality = nx.betweenness_centrality(H, k=50, weight='weight', endpoints=True)

# compute community structure
lpc = nx.community.greedy_modularity_communities(H,weight="weight")
community_index = {n: i for i, com in enumerate(lpc) for n in com}

#### draw graph ####
fig, ax = plt.subplots(figsize=(20, 15))
pos = nx.spring_layout(H, k=0.15, seed=4572321)
node_color = [community_index[n] for n in H]
node_size = [v * 20000 for v in centrality.values()]
nx.draw_networkx(
    H,
    pos=pos,
    with_labels=False,
    node_color=node_color,
    node_size=node_size,
    edge_color="gainsboro",
    alpha=0.4,
)

# Title/legend
font = {"color": "k", "fontweight": "bold", "fontsize": 20}
ax.set_title("Gene functional association network (C. elegans)", font)
# Change font color for legend
font["color"] = "r"

ax.text(
    0.80,
    0.10,
    "node color = community structure",
    horizontalalignment="center",
    transform=ax.transAxes,
    fontdict=font,
)
ax.text(
    0.80,
    0.06,
    "node size = betweeness centrality",
    horizontalalignment="center",
    transform=ax.transAxes,
    fontdict=font,
)

# Resize figure for label readibility
ax.margins(0.1, 0.05)
fig.tight_layout()
plt.axis("off")
plt.show()


def degree_distribution(G):
    L_degree = np.array([])
    for streamer, data in G.degree():
        L_degree = np.append(L_degree, data)
    return(L_degree)

def viewers_distribution(G):
    L_viewers = np.array([])
    for streamer, data in G.nodes.data():
        if len(data) == 2:
            viewer, subject = data['viewers'], data['subject']
            L_viewers = np.append(L_viewers, viewer)
    return(L_viewers)
            
def modularity_communities_games(G):
    """Allows to calculate the communities of a graph based on a selection of games,
    then it is possible to calculate the modularity of this communities."""
    if G == L_graph_2[0]:
        return(0)
    else:
        L_name_games = np.array(['Teamfight Tactics', 'FIFA 22', 'Just Chatting', 'Pokémon Brilliant Diamond/Shining Pearl', 'Fortnite', 'Call of Duty: Warzone', 'VALORANT', 'League of Legends', 'Dead by Daylight', 'Apex Legends', 'Minecraft', 'Grand Theft Auto V', 'Age of Empires IV', 'Rocket League', "Sid Meier's Civilization VI", 'Counter-Strike: Global Offensive', 'Talk Shows & Podcasts'])
        L_community = [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
        print(L_community[0])
        n = len(L_name_games)
        for streamer, data in G.nodes.data():
            if len(data) == 2:
                viewer, subject = data['viewers'], data['subject']
                if (L_name_games==subject).any() == True: 
                    a = np.where(L_name_games==subject)[0][0]
                    L_community[a].append(streamer)
                else :
                    L_community[n].append(streamer)
            else:
                L_community[n].append(streamer)

        return(nx_comm.modularity(G, L_community))


L_graph = []
L_graph_2 = []
for i in range (0,24):
    if i < 10:
        name_graph = 'G_streamers_one_time_link_1D_H0'+str(i)+'.graphml'
    else : 
        name_graph = 'G_streamers_one_time_link_1D_H'+str(i)+'.graphml'
    g = nx.read_graphml('./graphs/'+name_graph)
    L_graph.append(g)
    L_graph_2.append(relevance_filter(g))

145 nodes
4500 edges
[INFO] filtering nodes
139 nodes removed
4486 edges removed
[INFO] filtering edges
4500 edges removed
186 nodes
8207 edges
[INFO] filtering nodes
171 nodes removed
8108 edges removed
[INFO] filtering edges
8201 edges removed
215 nodes
11597 edges
[INFO] filtering nodes
191 nodes removed
11340 edges removed
[INFO] filtering edges
11574 edges removed
249 nodes
16202 edges
[INFO] filtering nodes
220 nodes removed
15823 edges removed
[INFO] filtering edges
16157 edges removed
270 nodes
19750 edges
[INFO] filtering nodes
227 nodes removed
18892 edges removed
[INFO] filtering edges
19668 edges removed
304 nodes
25049 edges
[INFO] filtering nodes
250 nodes removed
23684 edges removed
[INFO] filtering edges
24935 edges removed
335 nodes
31121 edges
[INFO] filtering nodes
271 nodes removed
29216 edges removed
[INFO] filtering edges
30949 edges removed
357 nodes
36278 edges
[INFO] filtering nodes
279 nodes removed
33426 edges removed
[INFO] filtering edges
36032 edges removed
382 nodes
42062 edges
[INFO] filtering nodes
288 nodes removed
37944 edges removed
[INFO] filtering edges
41736 edges removed
421 nodes
51223 edges
[INFO] filtering nodes
310 nodes removed
45485 edges removed
[INFO] filtering edges
50810 edges removed
443 nodes
57633 edges
[INFO] filtering nodes
312 nodes removed
49697 edges removed
[INFO] filtering edges
57105 edges removed
475 nodes
66571 edges
[INFO] filtering nodes
320 nodes removed
55512 edges removed
[INFO] filtering edges
65905 edges removed
498 nodes
73621 edges
[INFO] filtering nodes
320 nodes removed
59111 edges removed
[INFO] filtering edges
72740 edges removed
514 nodes
79669 edges
[INFO] filtering nodes
319 nodes removed
62371 edges removed
[INFO] filtering edges
78612 edges removed
541 nodes
88120 edges
[INFO] filtering nodes
338 nodes removed
69304 edges removed
[INFO] filtering edges
86945 edges removed
566 nodes
95426 edges
[INFO] filtering nodes
355 nodes removed
75041 edges removed
[INFO] filtering edges
94123 edges removed
591 nodes
102350 edges
[INFO] filtering nodes
374 nodes removed
80870 edges removed
[INFO] filtering edges
100960 edges removed
608 nodes
109359 edges
[INFO] filtering nodes
384 nodes removed
86426 edges removed
[INFO] filtering edges
107861 edges removed
632 nodes
117758 edges
[INFO] filtering nodes
405 nodes removed
94179 edges removed
[INFO] filtering edges
116215 edges removed
658 nodes
126283 edges
[INFO] filtering nodes
429 nodes removed
102303 edges removed
[INFO] filtering edges
124692 edges removed
682 nodes
134363 edges
[INFO] filtering nodes
453 nodes removed
110368 edges removed
[INFO] filtering edges
132756 edges removed
726 nodes
149461 edges
[INFO] filtering nodes
497 nodes removed
125444 edges removed
[INFO] filtering edges
147828 edges removed
745 nodes
156602 edges
[INFO] filtering nodes
513 nodes removed
131923 edges removed
[INFO] filtering edges
154934 edges removed
753 nodes
160548 edges
[INFO] filtering nodes
520 nodes removed
135644 edges removed
[INFO] filtering edges
158863 edges removed


T = np.arange(0,24)
L_nb_nodes = []
L_nb_edges = []
L_density = []
L_transitivity = []
L_mean_degree = []
L_max_degree = []
L_diameter = []
L_modularity = []
L_ASPL = []
L_mean_viewer = [] 
L_max_viewer = []
L_nb_nodes_2 = []
L_nb_edges_2 = []
L_density_2 = []
L_transitivity_2 = []
L_mean_degree_2 = []
L_max_degree_2 = []
L_diameter_2 = []
L_modularity_2 = []
L_ASPL_2 = []
L_mean_viewer_2 = [] 
L_max_viewer_2 = []
for i in L_graph:
    print(i)
    L_nb_nodes.append(len(i.nodes()))
    L_nb_edges.append(len(i.edges()))
    L_density.append(nx.density(i))
    L_mean_degree.append(np.mean(degree_distribution(i)))
    L_max_degree.append(np.max(degree_distribution(i)))
    L_diameter.append(nx.diameter(i))
    L_transitivity.append(nx.transitivity(i))
    L_modularity.append(modularity_communities_games(i))
    L_ASPL.append(nx.average_shortest_path_length(i))
    L_mean_viewer.append(np.mean(viewers_distribution(i)))
    L_max_viewer.append(np.max(viewers_distribution(i)))
for i in L_graph_2:
    print(i)
    L_nb_nodes_2.append(len(i.nodes()))
    L_nb_edges_2.append(len(i.edges()))
    L_density_2.append(nx.density(i))
    L_mean_degree_2.append(np.mean(degree_distribution(i)))
    L_max_degree_2.append(np.max(degree_distribution(i)))
    L_diameter_2.append(nx.diameter(i.subgraph(sorted(nx.connected_components(i), key=len, reverse=True)[0])))
    L_transitivity_2.append(nx.transitivity(i))
    L_modularity_2.append(modularity_communities_games(i))
    L_ASPL_2.append(nx.average_shortest_path_length(i.subgraph(sorted(nx.connected_components(i), key=len, reverse=True)[0])))
    L_mean_viewer_2.append(np.mean(viewers_distribution(i)))
    L_max_viewer_2.append(np.max(viewers_distribution(i)))

Graph with 145 nodes and 4500 edges
[]
Graph with 186 nodes and 8207 edges
[]
Graph with 215 nodes and 11597 edges
[]
Graph with 249 nodes and 16202 edges
[]
Graph with 270 nodes and 19750 edges
[]
Graph with 304 nodes and 25049 edges
[]
Graph with 335 nodes and 31121 edges
[]
Graph with 357 nodes and 36278 edges
[]
Graph with 382 nodes and 42062 edges
[]
Graph with 421 nodes and 51223 edges
[]
Graph with 443 nodes and 57633 edges
[]
Graph with 475 nodes and 66571 edges
[]
Graph with 498 nodes and 73621 edges
[]
Graph with 514 nodes and 79669 edges
[]
Graph with 541 nodes and 88120 edges
[]
Graph with 566 nodes and 95426 edges
[]
Graph with 591 nodes and 102350 edges
[]
Graph with 608 nodes and 109359 edges
[]
Graph with 632 nodes and 117758 edges
[]
Graph with 658 nodes and 126283 edges
[]
Graph with 682 nodes and 134363 edges
[]
Graph with 726 nodes and 149461 edges
[]
Graph with 745 nodes and 156602 edges
[]
Graph with 753 nodes and 160548 edges
[]
Graph with 6 nodes and 0 edges
Graph with 15 nodes and 6 edges
[]
Graph with 24 nodes and 23 edges
[]
Graph with 29 nodes and 45 edges
[]
Graph with 43 nodes and 82 edges
[]
Graph with 54 nodes and 114 edges
[]
Graph with 64 nodes and 172 edges
[]
Graph with 78 nodes and 246 edges
[]
Graph with 94 nodes and 326 edges
[]
Graph with 111 nodes and 413 edges
[]
Graph with 131 nodes and 528 edges
[]
Graph with 155 nodes and 666 edges
[]
Graph with 178 nodes and 881 edges
[]
Graph with 195 nodes and 1057 edges
[]
Graph with 203 nodes and 1175 edges
[]
Graph with 211 nodes and 1303 edges
[]
Graph with 217 nodes and 1390 edges
[]
Graph with 224 nodes and 1498 edges
[]
Graph with 227 nodes and 1543 edges
[]
Graph with 229 nodes and 1591 edges
[]
Graph with 229 nodes and 1607 edges
[]
Graph with 229 nodes and 1633 edges
[]
Graph with 232 nodes and 1668 edges
[]
Graph with 233 nodes and 1685 edges
[]


fig, ax = plt.subplots(3,3, sharex = True, figsize = (12,18), tight_layout=True) #
#Before Normalisation
ax[0,0].plot(T, L_nb_edges, label='Before normalisation')
ax[0,1].plot(T, L_nb_nodes, label='Before normalisation')
ax[0,2].plot(T, L_mean_degree, label='Mean degree before normalisation')
ax[0,2].plot(T, L_max_degree, label='Max degree before normalisation')
ax[1,0].plot(T, L_density, label='Before normalisation')
ax[1,1].plot(T, L_transitivity, label='Before normalisation')
ax[1,2].plot(T, L_modularity, label='Before normalisation')
ax[2,0].plot(T, L_diameter, label='Before normalisation')
ax[2,1].plot(T, L_ASPL, label='Before normalisation')
ax[2,2].plot(T, L_mean_viewer, label='Mean # of viewers before normalisation')
ax[2,2].plot(T, L_max_viewer, label='Max # of viewers before normalisation')
#After normalisation
ax[0,0].plot(T, L_nb_edges_2, label='After normalisation')
ax[0,1].plot(T, L_nb_nodes_2, label='After normalisation')
ax[0,2].plot(T, L_mean_degree_2, label='Mean degree after normalisation')
ax[0,2].plot(T, L_max_degree_2, label='Max degree after normalisation')
ax[1,0].plot(T, L_density_2, label='After normalisation')
ax[1,1].plot(T, L_transitivity_2, label='After normalisation')
ax[1,2].plot(T, L_modularity_2, label='After normalisation')
ax[2,0].plot(T, L_diameter_2, label='After normalisation')
ax[2,1].plot(T, L_ASPL_2, label='After normalisation')
ax[2,2].plot(T, L_mean_viewer_2, label='Mean # of viewers after normalisation')
ax[2,2].plot(T, L_max_viewer_2, label='Max # of viewers after normalisation')
#Graphs 
ax[0,0].set_ylabel('# of edges')
ax[0,1].set_ylabel('# of nodes')
ax[0,2].set_ylabel('Degree')
ax[1,0].set_ylabel('Density')
ax[1,1].set_ylabel('Transivity')
ax[1,2].set_ylabel('Modularity')
ax[2,0].set_ylabel('Diameter')
ax[2,1].set_ylabel('Average shortest path length')
ax[2,2].set_ylabel('# of viewers')
ax[0,0].set_yscale('log')
ax[0,0].legend()
ax[0,1].legend()
ax[0,2].legend()
ax[1,0].legend()
ax[1,1].legend()
ax[1,2].legend()
ax[2,0].legend()
ax[2,1].legend()
ax[2,2].legend()
ax[2,0].set_xlabel('Temps (h)') 
ax[2,1].set_xlabel('Temps (h)') 
ax[2,2].set_xlabel('Temps (h)') 
plt.show()


pre_base = "graphs/"
sub_base = "_1D"
base = "./Streamers_fr" + sub_base + "/"
viewers_file = "viewers.json"


def fidelity(viewer_watch_list, streamer, stream_time): 
    return viewer_watch_list.count(streamer)/stream_time[streamer]
    
def passion(viewer_watch_list, subject, subjects, remove_chatting=True):
    viewer_watch_subjects = list(map(lambda s:subjects[s], viewer_watch_list))
    if remove_chatting:
        return viewer_watch_subjects.count(subject)/(len(viewer_watch_list)-viewer_watch_subjects.count("Just Chatting"))
    return viewer_watch_subjects.count(subject)/len(viewer_watch_list)
    
def curiosity(viewer_watch_list, alpha=1):
    c = alpha*len(set(viewer_watch_list))/len(viewer_watch_list)
    return min(1, c)

def audience_stats(compared_streamers, stream_time, all_time_viewer_watch_lists, subjects):
    data = {streamer:{"fidelity" : [],
                  "passion" : [],
                  "curiosity" : []} for streamer in compared_streamers}

    for viewer, viewer_watch_list in all_time_viewer_watch_lists.items():
        for streamer in compared_streamers:
            if streamer in viewer_watch_list:
                subject = subjects[streamer]
                f = fidelity(viewer_watch_list, streamer, stream_time)
                data[streamer]["fidelity"].append(f)
                if subject == "Just Chatting":
                    p = passion(viewer_watch_list, subject, subjects, remove_chatting=False)
                else:
                    p = passion(viewer_watch_list, subject, subjects)
                data[streamer]["passion"].append(p)
                c = curiosity(viewer_watch_list)
                data[streamer]["curiosity"].append(c)  
    return data

def relevance_filter_directed(G, nodes_size=500, link_rate=5e-4):  
    G_filtered = cp.deepcopy(G)
    print(f"{len(G.nodes)} nodes")
    print(f"{len(G.edges)} edges")

    print("[INFO] filtering nodes")
    to_dump = filterfalse(lambda n:n[1]>nodes_size, G_filtered.nodes("watched_time"))
    nodes_to_dump, _ = zip(*to_dump)
    G_filtered.remove_nodes_from(nodes_to_dump)

    print(f"{len(G.nodes)-len(G_filtered.nodes)} nodes removed")
    print(f"{len(G.edges)-len(G_filtered.edges)} edges removed")

    print("[INFO] filtering edges")
    f = lambda e:e[2]["change"]/G_filtered.nodes[e[1]]["watched_time"]>link_rate
    to_dump = cp.deepcopy(list(filterfalse(f, G_filtered.edges.data())))
    G_filtered.remove_edges_from(list(to_dump))
    print(f"{len(G.edges)-len(G_filtered.edges)} edges removed")
    return G_filtered


G = nx.readwrite.graphml.read_graphml("graphs/G_streamers_watch_time_link_1D.graphml")
with open("graphs/streamers_watch_time_link_1D_ST", "r") as file:
    stream_time = json.load(file)
with open("graphs/streamers_watch_time_link_1D_WT", "r") as file:
    all_time_viewer_watch_lists = json.load(file)
with open("graphs/streamers_watch_time_link_1D_sub", "r") as file:
    subjects = json.load(file)
    
## It takes time so run if really needed
#streamers = list(set(sum(all_time_viewer_watch_lists.values(), start=[])))


compared_streamers = ["domingo", "gotaga", "zerator", "ponce", "dokhy"]
data = audience_stats(compared_streamers, stream_time, all_time_viewer_watch_lists, subjects)
average_data = [
                [np.mean(data[streamer][q]) for streamer in compared_streamers] 
                for q in data[compared_streamers[0]].keys()
               ]


x = np.arange(len(compared_streamers))
width = 0.25
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.set_title("Audience Analysis: Average")
ax.bar(x-width, average_data[0], width = width, align='center', label="Fidelity")
ax.bar(x, average_data[1], width = width, align='center', label="Passion")
ax.bar(x+width, average_data[2], width = width, align='center', label="Curisity")
ax.set_xticks(x, minor=False) #Sets new labels positions
labels = [streamer+"\n"+subjects[streamer][0][:15] for streamer in compared_streamers]
ax.set_xticklabels(labels, minor=False)
ax.legend()

t = {"fidelity" : 0.2,
     "passion" : 0.6,
     "curiosity" : 0.4} #threshold
threshold_data = [
                [np.sum(np.array(data[streamer][q])>t[q])/len(data[streamer]["fidelity"]) for streamer in compared_streamers] 
                for q in data[compared_streamers[0]].keys()
                 ]

ax = fig.add_axes([1.1,0,1,1])
ax.set_title("Audience Analysis: Threshold Percentages")
ax.bar(x-width, threshold_data[0], width = width, align='center', label="Fidelity")
ax.bar(x, threshold_data[1], width = width, align='center', label="Passion")
ax.bar(x+width, threshold_data[2], width = width, align='center', label="Curisity")
ax.set_xticks(x, minor=False) #Sets new labels positions
labels = [streamer+"\n V:"+str(len(data[streamer]["fidelity"])) for streamer in compared_streamers]
ax.set_xticklabels(labels, minor=False)
ax.legend()


threshold_data = [
                [np.sum(np.array(data[streamer][q])>t[q]) for streamer in compared_streamers] 
                for q in data[compared_streamers[0]].keys()
                 ]

ax = fig.add_axes([0,-1.2,2,1])
ax.set_title("Audience Analysis: Threshold Numbers")
ax.bar(x-width, threshold_data[0], width = width, align='center', label="Fidelity")
ax.bar(x, threshold_data[1], width = width, align='center', label="Passion")
ax.bar(x+width, threshold_data[2], width = width, align='center', label="Curisity")
ax.set_xticks(x, minor=False) #Sets new labels positions
labels = [streamer+"\n Viewers:"+str(len(data[streamer]["fidelity"])) for streamer in compared_streamers]
ax.set_xticklabels(labels, minor=False)
ax.legend()
plt.show()


N_viewers = 100
viewers = list(all_time_viewer_watch_lists.keys())
np.random.seed(12949302)
part_streamers_list = [all_time_viewer_watch_lists[viewer] for viewer in np.random.choice(viewers,N_viewers,False)]
part_streamers = list(set(sum(part_streamers_list, start=[])))

data = audience_stats(part_streamers, stream_time, all_time_viewer_watch_lists, subjects)

fidelity_red = sum([data[streamer]["fidelity"] for streamer in part_streamers], start=[])
fidelity_mean = np.mean(fidelity_red)
fidelity_std = np.std(fidelity_red)

passion_red = sum([data[streamer]["passion"] for streamer in part_streamers], start=[])
passion_mean = np.mean(passion_red)
passion_std =_std = np.std(passion_red)

curiosity_red = sum([data[streamer]["curiosity"] for streamer in part_streamers], start=[])
curiosity_mean = np.mean(curiosity_red)
curiosity_std = np.std(curiosity_red)


fig = plt.figure()
x=0
ax = fig.add_axes([0,0,1,1])
ax.set_title("Audience Analysis: General Average")
ax.bar(x-width, fidelity_mean, width = width, align='center', label="Fidelity")
ax.bar(x, passion_mean, width = width, align='center', label="Passion")
ax.bar(x+width, curiosity_mean, width = width, align='center', label="Curisity")
ax.errorbar(x-width, fidelity_mean, yerr=fidelity_std, c='k',capsize=10, label="Standard Deviation")
ax.errorbar(x, passion_mean, yerr=passion_std , c='k',capsize=10)
ax.errorbar(x+width, curiosity_mean, yerr=curiosity_std, c='k',capsize=10)

ax.set_xticks([x-width, x, x+width], minor=False) #Sets new labels positions
labels = data[part_streamers[0]].keys()
ax.set_xticklabels(labels, minor=False)
ax.legend()



ax = fig.add_axes([1.1,0,1,1])
ax.set_title('Audience Analysis: General Box Plot')
quantities = [fidelity_red, passion_red, curiosity_red]
ax.boxplot(quantities, showfliers=False)
ax.set_xticks([1,2,3], labels)
plt.show()


G_filtered = relevance_filter_directed(G)## Filter to keep relevant nodes for visualisation

563 nodes
24281 edges
[INFO] filtering nodes
173 nodes removed
1968 edges removed
[INFO] filtering edges
16851 edges removed


degree_sequence = sorted([d for n, d in G_filtered.degree(weight='change')], reverse=True)
dmax = max(degree_sequence)

fig = plt.figure("Degree of a random graph", figsize=(8, 8))
# Create a gridspec for adding subplots of different sizes
axgrid = fig.add_gridspec(5, 4)

ax0 = fig.add_subplot(axgrid[0:3, :])
pos = nx.spring_layout(G_filtered, weight="change", seed=10396953)
_, watched_time = zip(*list(G_filtered.nodes("watched_time")))
scaled_watched_time=list(map(lambda x:x/500, watched_time))
nx.draw_networkx_nodes(G_filtered, pos, ax=ax0, node_size=scaled_watched_time)
nx.draw_networkx_edges(G_filtered, pos, ax=ax0, alpha=0.4)
ax0.set_axis_off()

ax1 = fig.add_subplot(axgrid[3:, :2])
ax1.plot(degree_sequence, "b-", marker="o")
ax1.set_title("Degree Rank Plot")
ax1.set_ylabel("Degree")
ax1.set_xlabel("Rank")

ax2 = fig.add_subplot(axgrid[3:, 2:])
ax2.hist(degree_sequence, bins=100)
ax2.set_title("Degree histogram")
ax2.set_xlabel("Degree")
ax2.set_ylabel("# of Nodes")

fig.tight_layout()
plt.show()

Twitch Analysis¶

From Data to Graph¶

Gathering Data¶

Conversion into Graph¶

Statical Analysis¶

Library Imports¶

Function Definitions¶

Constant definitions¶

Statistics Analysis¶

Degree¶

Without Filtering¶

With Filtering¶

Interpretation¶

Assortativity¶

Without Filtering¶

With Filtering¶

Interpretation¶

Betweeness Centrality and Community¶

Interpretation¶

Graphical Representation¶

Gephi Representation¶

Legend¶

Week Snapshots¶

Interpretation¶

Hour Dynamic¶

Defining Functions¶

Loading Graphs¶

Analysis¶

Plot of the Data¶

Interpretation¶

Dynamical Analysis¶

Defining Constants¶

Defining Functions¶

Loading Graph and Objects¶

Audience Analysis¶

Comparison Between Streamers¶

Interpretation¶

General Analysis¶

Interpretation¶

Dynamical Graph¶

Interpretation¶

Further Use¶