The core of our project can be found here on Github. It contains the scripts used to gather data and convert them into graphs. It also contains a wide set of data enabling reproductibility of our work.
Before (re)-running/modifying the notebook be sure to clone the Github repository and unzip images.zip
and graphs.zip
The idea was to study the Twitch network for a given period of time. We are mainly interested in the communities and the hubs of this network
To gather data we overall used two similar ways.
For the final analysis we decided to gather the data of the 100 top french streamers (and their viewers) every 15 minutes and for 1 day.
The first choice we made was to get rid of the viewers. Because keeping them would mean to have a bipartie graph and would make the analysis slightly more difficult. The second point was that viewers are not of great interest as regard of the network structure...
As the viewers conveys the links between streams (and thus streamers), we decided to add weighted links between streamers where the weights were given by the number of common viewers between the streamers.
And to aggregate the data we decided to keep for each streamer a set of viewers that has ever watch the streamer over the period of time.
Hence, for the dynamic analysis we propose a slightly different approach. The main idea is to track the evolution of the network with the snapshots we gathered (every 15 minutes for 1 day).
import json
import os
import networkx as nx
import networkx.algorithms.community as nx_comm
import numpy as np
import matplotlib.pyplot as plt
import copy as cp
from itertools import filterfalse
def relevance_filter(G, nodes_size=1000, link_rate=0.05, normalisation=True):
"""Reshape a graph by filtering the streamers that have less than 1000 viewers cumulated,
and reweight the graph by w = # of viewers in common/min(# of viewers of streamer 1, # of viewers of streamer 2)
then by filtering edges that have a weight inferior to 0.05.
Thus it means that there is a link between two streamers,
if 5% of the smaller streamer viewers are also viewers of the bigger streamer. """
G_filtered = cp.deepcopy(G)
print(f"{len(G.nodes)} nodes")
print(f"{len(G.edges)} edges")
print("[INFO] filtering nodes")
to_dump = filterfalse(lambda n:n[1]>nodes_size, G_filtered.nodes("viewers"))
nodes_to_dump, _ = zip(*to_dump)
G_filtered.remove_nodes_from(nodes_to_dump)
print(f"{len(G.nodes)-len(G_filtered.nodes)} nodes removed")
print(f"{len(G.edges)-len(G_filtered.edges)} edges removed")
print("[INFO] filtering edges")
f = lambda e:e[2]["weight"]/min(G_filtered.nodes[e[0]]["viewers"],G_filtered.nodes[e[1]]["viewers"])>link_rate
to_dump = cp.deepcopy(list(filterfalse(f, G_filtered.edges.data())))
G_filtered.remove_edges_from(list(to_dump))
print(f"{len(G.edges)-len(G_filtered.edges)} edges removed")
if normalisation:
for streamer1, streamer2, data in G_filtered.edges.data():
weight = data['weight']
viewer_streamer_1 = G_filtered.nodes.data('viewers')[streamer1]
viewer_streamer_2 = G_filtered.nodes.data('viewers')[streamer2]
weight_norm = weight/min(viewer_streamer_1, viewer_streamer_2)
data['weight'] = weight_norm
return G_filtered
sub_base = "_1D"
base = "./Streamers_fr" + sub_base + "/"
viewers_file = "viewers.json"
graph_file = "./graphs/G_streamers_one_time_link_1D.graphml"
For this analysis we explore the data of a full day from 9h 5th of December 2021 to the 6th (9h).
We tried to explore the different characterisis of the graph. As we processed the graph before rendering it we explored the influence of such processing on the graph properties
G = nx.readwrite.graphml.read_graphml(graph_file)
G_filtered = relevance_filter(G)
753 nodes 206094 edges [INFO] filtering nodes 520 nodes removed 180225 edges removed [INFO] filtering edges 204401 edges removed
degree_sequence = sorted([d for n, d in G.degree(weight='weight')], reverse=True)
dmax = max(degree_sequence)
fig = plt.figure("Degree of a random graph", figsize=(8, 8))
# Create a gridspec for adding subplots of different sizes
axgrid = fig.add_gridspec(5, 4)
ax0 = fig.add_subplot(axgrid[0:3, :])
Gcc = G.subgraph(sorted(nx.connected_components(G), key=len, reverse=True)[0])
pos = nx.spring_layout(Gcc, weight='weight', seed=10396953)
nx.draw_networkx_edges(Gcc, pos, ax=ax0, alpha=0.1)
_, viewers_number = zip(*list(Gcc.nodes("viewers")))
scaled_viewers_number = list(map(lambda x:x/300, viewers_number))
nx.draw_networkx_nodes(Gcc, pos, ax=ax0, node_size=scaled_viewers_number)
ax0.set_title("Connected components of G")
ax0.set_axis_off()
ax1 = fig.add_subplot(axgrid[3:, :2])
ax1.plot(degree_sequence, "b-", marker="o")
ax1.set_title("Degree Rank Plot")
ax1.set_ylabel("Degree")
ax1.set_xlabel("Rank")
ax2 = fig.add_subplot(axgrid[3:, 2:])
ax2.hist(degree_sequence, bins=100)
ax2.set_title("Degree histogram")
ax2.set_xlabel("Degree")
ax2.set_ylabel("# of Nodes")
fig.tight_layout()
plt.show()
degree_sequence = sorted([d for n, d in G_filtered.degree(weight='weight')], reverse=True)
dmax = max(degree_sequence)
fig = plt.figure(figsize=(8, 8))
# Create a gridspec for adding subplots of different sizes
axgrid = fig.add_gridspec(5, 4)
ax0 = fig.add_subplot(axgrid[0:3, :])
Gcc = G_filtered.subgraph(sorted(nx.connected_components(G_filtered), key=len, reverse=True)[0])
pos = nx.spring_layout(Gcc, weight='weight', seed=10396953)
nx.draw_networkx_edges(Gcc, pos, ax=ax0, alpha=0.1)
_, viewers_number = zip(*list(Gcc.nodes("viewers")))
scaled_viewers_number = list(map(lambda x:x/300, viewers_number))
print(len(scaled_viewers_number))
nx.draw_networkx_nodes(Gcc, pos, ax=ax0, node_size=scaled_viewers_number)
ax0.set_title("Connected components of G")
ax0.set_axis_off()
ax1 = fig.add_subplot(axgrid[3:, :2])
ax1.plot(degree_sequence, "b-", marker="o")
ax1.set_title("Degree Rank Plot")
ax1.set_ylabel("Degree")
ax1.set_xlabel("Rank")
ax2 = fig.add_subplot(axgrid[3:, 2:])
ax2.hist(degree_sequence, bins=50)
ax2.set_title("Degree histogram")
ax2.set_xlabel("Degree")
ax2.set_ylabel("# of Nodes")
fig.tight_layout()
plt.show()
231
We observe that the shapes of the degree-rank plot and the degree distribution are conserved with the filtering and normalisation. It is then keeping the main structure while becoming far much readable.
a1 = nx.algorithms.assortativity.degree_assortativity_coefficient(G, weight="weight")
a2 = nx.algorithms.assortativity.attribute_assortativity_coefficient(G, "viewers")
a3 = nx.algorithms.assortativity.numeric_assortativity_coefficient(G, "viewers")
a4 = nx.algorithms.assortativity.average_degree_connectivity(G, weight="weight")
print(f"degree_assortativity_coefficient : {a1}\n\
attribute_assortativity_coefficient : {a2}\n\
numeric_assortativity_coefficient : {a3}")
fig = plt.figure()
degrees = sorted(a4.keys())
ax = fig.add_axes([0,0,1,1])
ax.scatter(degrees, [a4[degree] for degree in degrees])
ax.set_title("average_degree_connectivity")
ax.set_xlabel("Degree")
ax.set_ylabel("Connectivity")
ax = fig.add_axes([1.1,0,1,1])
ax.hist(a4, bins=60)
ax.set_title("Connectivity histogram")
ax.set_xlabel("Connectivity")
ax.set_ylabel("# of Nodes")
plt.show()
degree_assortativity_coefficient : -0.014267971271065362 attribute_assortativity_coefficient : -0.0013754749578557966 numeric_assortativity_coefficient : -0.009911478270805477
a1 = nx.algorithms.assortativity.degree_assortativity_coefficient(G_filtered, weight="weight")
a2 = nx.algorithms.assortativity.attribute_assortativity_coefficient(G_filtered, "viewers")
a3 = nx.algorithms.assortativity.numeric_assortativity_coefficient(G_filtered, "viewers")
a4 = nx.algorithms.assortativity.average_degree_connectivity(G_filtered, weight="weight")
print(f"degree_assortativity_coefficient : {a1}\n\
attribute_assortativity_coefficient : {a2}\n\
numeric_assortativity_coefficient : {a3}")
fig = plt.figure()
degrees = sorted(a4.keys())
ax = fig.add_axes([0,0,1,1])
ax.scatter(degrees, [a4[degree] for degree in degrees])
ax.set_title("average_degree_connectivity")
ax.set_xlabel("Degree")
ax.set_ylabel("Connectivity")
ax = fig.add_axes([1.1,0,1,1])
ax.hist(a4, bins=20)
ax.set_title("Connectivity histogram")
ax.set_xlabel("Connectivity")
ax.set_ylabel("# of Nodes")
plt.show()
degree_assortativity_coefficient : -0.2936018931033102 attribute_assortativity_coefficient : -0.012213309130847044 numeric_assortativity_coefficient : -0.22417407421479563
We observe a loss of connectivity and assortativity. The filtered network is now way more sparse. As we are more interested in qualitative interpretations this is not problematic here. Yet, if one would want to do quantitative calculation, it could be better to keep the originl graph.
components = nx.connected_components(G_filtered)
largest_component = max(components, key=len)
H = G_filtered.subgraph(largest_component)
# compute centrality
centrality = nx.betweenness_centrality(H, k=50, weight='weight', endpoints=True)
# compute community structure
lpc = nx.community.label_propagation_communities(H)
community_index = {n: i for i, com in enumerate(lpc) for n in com}
#### draw graph ####
fig, ax = plt.subplots(figsize=(20, 15))
pos = nx.spring_layout(H, k=0.15, seed=4572321)
node_color = [community_index[n] for n in H]
node_size = [v * 20000 for v in centrality.values()]
nx.draw_networkx(
H,
pos=pos,
with_labels=False,
node_color=node_color,
node_size=node_size,
edge_color="gainsboro",
alpha=0.4,
)
# Title/legend
font = {"color": "k", "fontweight": "bold", "fontsize": 20}
ax.set_title("Gene functional association network (C. elegans)", font)
# Change font color for legend
font["color"] = "r"
ax.text(
0.80,
0.10,
"node color = community structure",
horizontalalignment="center",
transform=ax.transAxes,
fontdict=font,
)
ax.text(
0.80,
0.06,
"node size = betweeness centrality",
horizontalalignment="center",
transform=ax.transAxes,
fontdict=font,
)
# Resize figure for label readibility
ax.margins(0.1, 0.05)
fig.tight_layout()
plt.axis("off")
plt.show()