The data set contains observational and wearable sensors data collected in a group of 19 Guinea baboons living in an enclosure of a Primate Center in France, between June 13th 2019 and July 10th 2019.
These data were analyzed and published in the paper V. Gelardi, J. Godard, D. Paleressompoulle, N. Claidière, A. Barrat, “Measuring social networks in primates: wearable sensors vs. direct observations”, Proc. R. Soc. A 476:20190737 (2020).
The file OBS_data.csv contains all the behavioral events registered by an observer, with 8 columns:
DateTime = Time stamp of the event, namely the moment the observed behavior was registered. In case of STATE events (events with duration > 0), it refers to the beginning of the behavior;
Actor = The name of the actor;
Recipient = The name of the individual the Actor is acting upon;
Behavior = The behavior the Actor. 14 types of behaviors are registered:’Resting’, ‘Grooming’, ‘Presenting’,’Playing with’, ‘Grunting-Lipsmacking’, ‘Supplanting’,’Threatening’, ‘Submission’, ‘Touching’, ‘Avoiding’, ‘Attacking’,’Carrying’, ‘Embracing’, ‘Mounting’, ‘Copulating’, ‘Chasing’. In addition two other categories were included: ‘Invisible’ and ‘Other’;
Category = The classification of the behavior. It can be ‘Affiliative’, ‘Agonistic’, ‘Other’;
Duration = Duration of the observed behavior. POINT events have no duration;
The file RFID_data.csv contains contacts data recorded in the same period by the SocioPatterns infrastructure. The proximity sensors were worn by 13 of the 20 individuals cited above.
The data file consists of 4 columns:
t = time of the beginning of the contact in Epoch format (Unix timestamps);
i = Name of the first individual;
j = Name of the second individual;
DateTime
import pandas as pd
import re
import numpy as np
import time
data = pd.read_csv('OBS_data.csv', delimiter = '\t')
#the following lines allow to separate the date and the time in two distinct columns
datetime_df = pd.DataFrame(data['DateTime'].str.split(' ', expand=True))
datetime_df.columns = ['date', 'time']
data['Date'] = datetime_df['date']
data['Time'] = datetime_df['time']
data = data.drop(columns=['DateTime'])
#the following lines permit to get rid of few pathologic lines
boollist = data['Recipient'] =='EXTERNE'
data2 = data[boollist]
externe_index = list(data2.index)
data = data.drop(externe_index, axis = 0)
boollist = data['Recipient'] =='SELF'
data2 = data[boollist]
externe_index = list(data2.index)
data = data.drop(externe_index, axis = 0)
#we extract sets of baboons and bahaviours from the data
Actor = list(data['Actor']) + list(data['Recipient'])
Actor = set(Actor)
Actor.remove(np.nan)
Category = data['Category']
Category = set(Category)
Behavior = data['Behavior']
Behavior = set(Behavior)
#we group the data by dates and store them in a dictionnary
dates = set(data['Date'])
dates = list(dates)
dates.sort(key=lambda x: time.mktime(time.strptime(x,"%d/%m/%Y"))) #sorting the dates
data_groupby_date = data.groupby('Date')
values = [data_groupby_date.get_group(date) for date in dates]
data_groupby_date = dict(zip(dates, values))
data
Actor | Recipient | Behavior | Category | Duration | Point | Date | Time | |
---|---|---|---|---|---|---|---|---|
0 | EWINE | NaN | Invisible | Other | 34 | NO | 13/06/2019 | 09:35 |
1 | EWINE | NaN | Other | Other | 21 | NO | 13/06/2019 | 09:35 |
2 | EWINE | NaN | Invisible | Other | 42 | NO | 13/06/2019 | 09:35 |
3 | EWINE | NaN | Other | Other | 2 | NO | 13/06/2019 | 09:36 |
4 | EWINE | NaN | Invisible | Other | 30 | NO | 13/06/2019 | 09:36 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
5372 | LIPS | FELIPE | Resting | Affiliative | 21 | NO | 10/07/2019 | 11:05 |
5373 | LIPS | NEKKE | Resting | Affiliative | 21 | NO | 10/07/2019 | 11:05 |
5374 | LIPS | FELIPE | Resting | Affiliative | 8 | NO | 10/07/2019 | 11:05 |
5375 | LIPS | NaN | Other | Other | 28 | NO | 10/07/2019 | 11:05 |
5376 | KALI | NaN | Invisible | Other | 301 | NO | 10/07/2019 | 11:06 |
5373 rows × 8 columns
The set of data we can now deal with is composed of:
dates: the list of observation dates
data_groupby_date: the dictionnary of the observation grouped by dates
Actor: the set of the baboons
Behavior: the set of behaviors¶
Category: the set of category to which belong the behaviors
print('Number of baboons: {}\nNumber of observation days: {}\nNumber of behaviours: {}'.format(len(Actor), len(data_groupby_date), len(Behavior)))
Number of baboons: 20 Number of observation days: 20 Number of behaviours: 18
def interactions(df):
'''returns a dataframe containing all the actions where 'Recipient' is
not NaN. Thus all the interactions between two animals.'''
# filters the rows where 'Recipient' is NaN, allowing us to see only
# the interactions between two monkeys (because 'Actor' is never
# NaN)
mask = (df['Actor'] != df['Recipient'])
return df[mask].dropna(subset=['Recipient', 'Actor'])
def interactions_a_on_b(df, a, b):
"""with df the dataframe of all interactions, returns the dataframe of
all events where there is an oriented interaction a -> b, so a is
the 'Actor', and b the 'Recipient'"""
mask = (data_df['Actor'] == a) & (data_df['Recipient'] == b)
return df[mask]
# this function is not very necessary because easy but meh, why not
# it can make the code easier to read
def interactions_behavior(df, behavior):
'''returns a dataframe containing an interaction with a certain behavior'''
# filters the rows where 'Recipient' is NaN, allowing us to see only
# the interactions between two monkeys (because 'Actor' is never
# NaN)
return df[df['Behavior'] == behavior]
def interactions_category(df, category):
'''returns a dataframe containing an interaction with a certain behavior'''
# filters the rows where 'Recipient' is NaN, allowing us to see only
# the interactions between two monkeys (because 'Actor' is never
# NaN)
return df[df['Category'] == category]
def interactions_in_timerange(df, t1, t2):
'''extracts all the interactions (between two baboons) in df between
two datetime t1 and t2.'''
# filters the rows where 'Recipient' is NaN, allowing us to see only
# the interactions between two monkeys (because 'Actor' is never
# NaN)
mask = (t1 <= df['datetime_obj']) & (df['datetime_obj'] <= t2)
return df[mask].dropna(subset=['Recipient'])
def interactions_to_edges(df):
'''takes the dataframe df: "actor, recipient, weight" with
interactions between monkeys and transform it to a list of tuples
(u, v, w) with u,v the edges, and w the weight.
this function is made to easily feed the networkx method
"add_weighted_edges_from" for a graph'''
# this "edge" iterates through each rows of the numpy array
# "df.to_numpy()", transforming each row into a tuple, and returning
# the list of all the tuples
edge_list = [tuple(edge) for edge in df.to_numpy()]
return edge_list
To create graph, we are using the library Networkx. Each node of the graph will correspond to a baboon. We gonna connect two nodes with an edge if there exist at least one interaction between the two nodes, i.e. the two baboons. The edges are gonna be weighted by the number of interactions between the nodes.
import networkx as nx
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [9, 5]
G = nx.Graph()
G.add_nodes_from(Actor)
nx.draw(G, with_labels = True)
We implement a first function called get_day_interaction which returns the graph of interactions over one day between the baboons. We can choose to selec only a given type of behaviour. Another function called draw_day_interactions is designed to draw the graph return by the function get_day_interaction:
def get_day_interactions(data_groupby_date, date, category, behavior):
'''get the network of the daily interaction between pairs of baboons
category = {'Affiliative', 'Agonistic', 'Other'}
behavior = {'Attacking','Avoiding','Carrying','Chasing','Copulating','Embracing',
'Grooming','Grunting-Lipsmacking','Invisible','Mounting','Other','Playing with',
'Presenting','Resting','Submission','Supplanting','Threatening','Touching'}
OR type ALL if you want them all
THE CATEGORY AND THE BEHAVIOUR SHOULD MATCH'''
inter = []
data_date = data_groupby_date[date]
if category in Category:
data_date = interactions_category(data_date, category)
if behavior in Behavior:
data_date = interactions_behavior(data_date, behavior)
data_date = interactions(data_date)
actor = list(data_date['Actor'])
recipient = list(data_date['Recipient'])
l = len(actor)
for i in range(l):
inter.append((actor[i], recipient[i]))
weight = [inter.count(inter[i]) for i in range(len(inter))]
weighted_inter = [(inter[i][0], inter[i][1], weight[i]) for i in range(len(inter))]
weighted_inter = list(set(weighted_inter))
G = nx.Graph()
G.add_nodes_from(Actor)
G.add_weighted_edges_from(weighted_inter)
return G
def draw_day_interactions(data_groupby_date, date, category, behavior):
'''draw the network of one day interaction, based on the function
get_day_interactions.
category = {'Affiliative', 'Agonistic', 'Other'}
behavior = {'Attacking','Avoiding','Carrying','Chasing','Copulating','Embracing',
'Grooming','Grunting-Lipsmacking','Invisible','Mounting','Other','Playing with',
'Presenting','Resting','Submission','Supplanting','Threatening','Touching'}
OR type 'ALL' if you want them all
THE CATEGORY AND THE BEHAVIOUR SHOULD MATCH, refer to the ethnogram'''
G = get_day_interactions(data_groupby_date, date, category, behavior)
pos = nx.circular_layout(G)
edges = G.edges()
weights = [G[u][v]['weight'] for u,v in edges]
if category in Category:
title = '{} interactions on the {}'.format(category, date)
if behavior in Behavior:
title = '{} ({}) interactions on the {}'.format(behavior, category, date)
else:
title = 'All types of interactions on the {}'.format(date)
plt.title(title)
return nx.draw(G, pos, width = weights, with_labels = True)
Example of a computed graph: graph of all the affiliative interactions between the baboons on the 26/06/2019.
draw_day_interactions(data_groupby_date, dates[9], 'Affiliative', 'All')
Here another example, the graph of all the agonistic interactions between the baboons on the 26/06/2019, which are sparser compared to the previous one.
draw_day_interactions(data_groupby_date, dates[9], 'Agonistic', 'All')
The same idea can be lead but with considering cummulative interactions over mutiples days:
def get_cummulative_day_interactions(data_groupby_date, date1, date2, category, behavior):
'''get the network of the cummulative interactions between pairs of baboons, between
date1 and date2
category = {'Affiliative', 'Agonistic', 'Other'}
behavior = {'Attacking','Avoiding','Carrying','Chasing','Copulating','Embracing',
'Grooming','Grunting-Lipsmacking','Invisible','Mounting','Other','Playing with',
'Presenting','Resting','Submission','Supplanting','Threatening','Touching'}
OR type ALL if you want them all
THE CATEGORY AND THE BEHAVIOUR SHOULD MATCH'''
l = dates.index(date2)-dates.index(date1)
G = nx.Graph()
G.add_nodes_from(Actor)
if l > 0 :
for i in range(l):
inter = []
data_date = data_groupby_date[dates[i]]
if category in Category:
data_date = interactions_category(data_date, category)
if behavior in Behavior:
data_date = interactions_behavior(data_date, behavior)
data_date = interactions(data_date)
act = list(data_date['Actor'])
recip = list(data_date['Recipient'])
L = len(act)
for j in range(L):
inter.append((act[j], recip[j]))
weight = [inter.count(inter[i]) for i in range(len(inter))]
weighted_inter = [(inter[i][0], inter[i][1], weight[i]) for i in range(len(inter))]
weighted_inter = list(set(weighted_inter))
G.add_weighted_edges_from(weighted_inter)
return G
def draw_cummulative_day_interactions(data_groupby_date, date1, date2, category, behavior):
'''draw the network of cummulative day interaction, based on the function
get_cummulative_day_interactions
category = {'Affiliative', 'Agonistic', 'Other'}
behavior = {'Attacking','Avoiding','Carrying','Chasing','Copulating','Embracing',
'Grooming','Grunting-Lipsmacking','Invisible','Mounting','Other','Playing with',
'Presenting','Resting','Submission','Supplanting','Threatening','Touching'}
OR type ALL if you want them all
THE CATEGORY AND THE BEHAVIOUR SHOULD MATCH'''
G = get_cummulative_day_interactions(data_groupby_date, date1, date2, category, behavior)
pos = nx.circular_layout(G)
edges = G.edges()
weights = [G[u][v]['weight'] for u,v in edges]
if category in Category:
title = '{} interactions between the {} and the {}'.format(category, date1, date2)
if behavior in Behavior:
title = '{} ({}) interactions between the {} and the {}'.format(behavior, category,
date1, date2)
else:
title = 'All types of interactions between the {} and the {}'.format(date1, date2)
plt.title(title)
return nx.draw(G, pos, width = weights, with_labels = True)
Here for instance we have all the cummulative affiliative interactions between June 13 and June 20.
draw_cummulative_day_interactions(data_groupby_date, dates[0], dates[5], 'Affiliative', 'all')
Here we have all the agonistic cummulative interactions between June 13 and June 20. We can noticed that there are much more less agonistic interactions
draw_cummulative_day_interactions(data_groupby_date, dates[0], dates[5], 'Agonistic', 'all')