-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathweighted_directed_configuration_model.py
187 lines (142 loc) · 6.56 KB
/
weighted_directed_configuration_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import numpy as np
import networkx as nx
import random
import math
import bisect
def my_directed_configuration_model(input_G, weight=None,weight_value=None,
create_using=None,seed=None):
"""This function is just a modification of the directed_configuration_model algorithm that can be found in
the networkx module (https://networkx.github.io/documentation/networkx-1.10/reference/generated/networkx.generators.degree_seq.directed_configuration_model.html)
In addition to the networkx one, this one keeps the node name rather than assigning a new sequence of integers
in the moment of the creation of the new network. Furthermore, I introduced a weight parameter that allows to
assign the name of the attribute and the constant value -- weight_value -- that will be assigned to all the
edges of the output network.
"""
in_degree_sequence = input_G.in_degree().values()
out_degree_sequence = input_G.out_degree().values()
if not sum(in_degree_sequence) == sum(out_degree_sequence):
raise nx.NetworkXError('Invalid degree sequences. '
'Sequences must have equal sums.')
if create_using is None:
create_using = nx.MultiDiGraph()
if not seed is None:
random.seed(seed)
nin=len(in_degree_sequence)
nout=len(out_degree_sequence)
# pad in- or out-degree sequence with zeros to match lengths
if nin>nout:
out_degree_sequence.extend((nin-nout)*[0])
else:
in_degree_sequence.extend((nout-nin)*[0])
# create empty graph with same nodes
G=nx.DiGraph()
G.add_nodes_from(input_G.nodes())
N=input_G.order()
if N==0 or max(in_degree_sequence)==0: # done if no edges
return G
# build stublists of available degree-repeated stubs
# e.g. for degree_sequence=[3,2,1,1,1]
# initially, stublist=[1,1,1,2,2,3,4,5]
# i.e., node 1 has degree=3 and is repeated 3 times, etc.
in_stublist=[]
for n in G:
for i in range(input_G.in_degree(n)):
in_stublist.append(n)
out_stublist=[]
for n in G:
for i in range(input_G.out_degree(n)):
out_stublist.append(n)
# shuffle stublists and assign pairs by removing 2 elements at a time
random.shuffle(in_stublist)
random.shuffle(out_stublist)
while in_stublist and out_stublist:
source = out_stublist.pop()
target = in_stublist.pop()
G.add_edge(source,target)
G[source][target][weight]=weight_value
#removing self loops:
G.remove_edges_from(G.selfloop_edges())
G.name="directed configuration_model %d nodes %d edges"%(G.order(),G.size())
return G
def split_chunks(l, k):
""" Splits l in k successive chunks."""
if k < 1:
yield []
raise StopIteration
n = len(l)
avg = n/k
remainders = n % k
start, end = 0, avg
while start < n:
if remainders > 0:
end = end + 1
remainders = remainders - 1
yield l[start:end]
start, end = end, end+avg
def get_log_binned_chunks(edges, weight='weight', n_chunks=100):
weight_list = [e[2][weight] for e in edges]
logmax_weight = int(math.ceil(np.log10(max(weight_list))))
logbins = np.logspace(0,logmax_weight,n_chunks)
#dividing the edge list into chunks [[], [], []]
chunks = [[] for n in range(n_chunks)] #each chunk is a list of edges
for e in edges:
thisw = e[2][weight]
chunk_index = bisect.bisect(logbins,thisw)
chunks[chunk_index].append(e)
#some bins are empty, I remove them
not_empty_chunks = [chunk for chunk in chunks if len(chunk)!=0]
return not_empty_chunks
def my_weighted_directed_configuration_model(G, weight='weight', n_chunks=100, mode='quantiles'):
"""Returns a directed weighted random graph with the same degree sequence of the input graph
and a 'medianised' weight distribution'.
1. The edges of the original graph are initially split into n different chunks, representing
n different classes of weight. Each class defines a sub-graph.
2. For each sub-graph we run the unweighted directed configuration model (based only on the degree
sequence) and then we assign the median of the weights of the initial sub-graph to the edges of the
resulting random graph.
3. The n weighted random graphs are finally reassembled together into one unique output graph.
Args
----
G: NetworkX DiGraph
weight: string (default='weight')
Key for edge data used as the edge weight w_ij.
WARNING: give the definition above, the weight value has to be a nonzero.
n_chunks: int (degault=100)
The number of chunks used to split the edges of the original graph into different
'classes of weight'.
mode: string (default='quantiles')
Keywork for the two modalities of chunk splitting
+ quantiles:
The edges are sorted and then equally split into n_chunks quantiles of same size.
+ logbinning:
The edges are split into n_chunks log-bins. Empty bins are finally removed.
Returns
-------
CM: NetworkX DiGraph
"""
G = G.copy()
#sorting edges according to the weight
edges = []
for a, b, data in sorted(G.edges(data=True), key=lambda (a, b, data): data[weight]):
edges.append((a,b,{weight:data[weight]}))
#splitting the edge list into chunks
if mode=='quantiles': chunks = split_chunks(edges,n_chunks)
elif mode=='logbinning': chunks = get_log_binned_chunks(edges, weight=weight, n_chunks=n_chunks)
CM = nx.DiGraph()
CM.add_nodes_from(G.nodes())
for chunk in chunks:
weights = [e[2][weight] for e in chunk]
median_weight = np.median(weights)
thisG=nx.DiGraph()
for e in chunk:
thisG.add_edge(e[0],e[1])
thisCM = my_directed_configuration_model(thisG, weight=weight,
weight_value=median_weight,
create_using=nx.DiGraph())
for u, v, d in thisCM.edges_iter(data=True):
if CM.has_edge(u,v): CM[u][v][weight] += d[weight]
else:
CM.add_edge(u,v)
CM[u][v][weight]=d[weight]
CM.name="weighted directed configuration_model %d nodes %d edges"%(CM.order(),CM.size())
return CM