Note that you can still remove the last loop altogether by accessing the
property maps as arrays: (tree.a * weight.a).sum()
This innocuous remark leads in fact to a huge speedup: you gain a factor 13,
a lot of surprise! Now, Graph Tool has the best performance among Networkit,
Igraph, Scipy and NetworkX of course. However, to put in perspective, Graph
Tool is only 2 times faster than pure Python code where Kruskal is
implemented with a basic Union-Find, we are far from genuine C/C++
performance, because usually graph implementations are 20 times faster in
C/C++ than pure Python.
Below, the complete benchmark. Note that Scipy code has the potential to
become the faster, the problem with scipy graphs is the need to convert
adjacency lists (or edges list) to Compressed Sparse Row and you have to
write it in Python, this is VERY slow.
# ***********************
from time import clock
from sys import stderr, stdin
import networkit as nk
from networkit.graph import RandomMaximumSpanningForest
from scipy.sparse.csgraph import minimum_spanning_tree
from scipy.sparse import csr_matrix
from igraph import Graph
import graph_tool as gt
from graph_tool.topology import min_spanning_tree
import numpy as np
# ------------ Pure Python Code ---------------------------
def find(s, parents):
ups=[s]
while True:
up=parents[s]
if up ==None:
break
s=up
ups.append(s)
for v in ups[:-1]:
parents[v]=s
return s
def merge(s, t, parents, heights):
rep_s=find(s, parents)
rep_t=find(t, parents)
if rep_s==rep_t:
return False
height_s=heights[rep_s]
height_t=heights[rep_t]
if height_s < height_t:
parents[rep_s]=rep_t
else:
parents[rep_t]=rep_s
if height_s==height_t:
heights[rep_s]+=1
return True
def kruskal_python(edges, n):
edges.sort(key=lambda t:t[2])
parents=[None]*n
heights=[0]*n
k=0
cost=0
cnt=0
while cnt<n-1:
a, b, w=edges[k]
k+=1
if merge(a,b, parents, heights):
cost+=w
cnt+=1
return cost
# ------------ Graph_tool Code ---------------------------
def kruskal_gt_numpy_sum(wedges,n):
g= gt.Graph(directed=False)
weight = g.new_edge_property("long long")
g.add_edge_list(np.array(wedges), eprops=[weight])
tree=min_spanning_tree(g, weights=weight)
return (tree.a * weight.a).sum()
def kruskal_gt(wedges,n):
g= gt.Graph(directed=False)
weight = g.new_edge_property("long long")
g.add_edge_list(np.array(wedges), eprops=[weight])
tree=min_spanning_tree(g, weights=weight)
return sum(b*weight[e] for (e,b) in zip(g.edges(), tree))
# ------------ Igraph Code ---------------------------
def kruskal_igraph(wedges, n):
edges, weights= zip(*[((a,b),w) for (a,b,w) in wedges])
g = Graph(n, edges=list(edges))
g.es["weight"] = weights
tree=g.spanning_tree(weights)
return sum(tree.es["weight"])
# ------------ Scipy Code ---------------------------
def wedges2adj(edges, n):
G=[for _ in range(n)]
for a, b, w in edges:
G[a].append((b,w))
G[b].append((a,w))
return G
def wedges2scr(edges, n):
G=wedges2adj(edges, n)
indptr=[0] # effectifs cumulés
cnt=0
for line in G:
cnt+=len(line)
indptr.append(cnt)
data=
indices=
for i in range(n):
for (j,w) in G[i]:
data.append(w)
indices.append(j)
return [data, indptr, indices]
def csr2wedges(Mcsr, shape):
n, p=shape
k=0
edges=
data, cumul, cols = Mcsr.data,Mcsr.indptr, Mcsr.indices
for i in range(n):
for j in range(cumul[i+1]-cumul[i]):
edges.append((i, cols[k], data[k]))
k+=1
return edges
def kruskal_scipy(edges, n):
data, indptr, indices=wedges2scr(edges, n)
csr=csr_matrix((data, indices, indptr), shape=(n, n))
tree=minimum_spanning_tree(csr)
edges=csr2wedges(tree, (n,n))
return sum(int(round(w)) for (a,b,w) in edges)
# ------------ Networkit Code ---------------------------
def kruskal_networkit(edges, n):
G=nk.Graph(n, weighted=True)
for u, v,w in edges:
G.addEdge(u,v,-w)
msf=RandomMaximumSpanningForest(G)
msf.run()
weights=
msf.getMSF().forEdges(lambda u, v, w, eid : weights.append(-w))
return round(sum(weights))
# ------------ NetworkX Code ---------------------------
def kruskal_networkX(edges, n):
import networkx as nx
G=nx.Graph()
for a, b, w in edges:
G.add_edge(a,b,weight=w)
return sum(d['weight'] for (_,_,d) in
nx.minimum_spanning_tree(G).edges(data=True))
# ------------ Benchmark ---------------------------
def go(solver, L):
global duration
N=len(L)
k=1
while k<N:
edges=
n=L[k]
k+=1
for a in range(n):
d=L[k]
k+=1
for j in range(d):
b=L[k]
k+=1
w=L[k]
k+=1
if a<b:
edges.append([a,b-1,w])
if solver==kruskal_scipy:
data, indptr, indices=wedges2scr(edges, n)
start=clock()
csr=csr_matrix((data, indices, indptr), shape=(n, n))
tree=minimum_spanning_tree(csr)
edges=csr2wedges(tree, (n,n))
duration+=clock()-start
else:
start=clock()
print(solver(edges, n))
duration+=clock()-start
# data
L=list(int(z) for z in stdin.read().split() if z.isdigit())
for solver in [kruskal_networkit, kruskal_gt_numpy_sum,
kruskal_gt,kruskal_python, kruskal_igraph, kruskal_scipy,
kruskal_networkX]:
duration=0
go(solver, L)
print("%s : %.3f" %(solver.__name__, duration), file=stderr)
# ***********************
Here is the code to generate data tests:
# ***********************
from random import randrange
def random_tree(n):
edges=
for a in range(1,n):
b=randrange(a)
edges.append((a,b))
return edges
def random_connected_graph(n, m):
edges={(a,b) if a<b else (b,a) for (a, b) in random_tree(n)}
while len(edges)<m:
a=randrange(n)
b=randrange(n)
t=tuple(sorted([a,b]))
if t!=(a,a) and t not in edges:
edges.add(t)
return [(a,b,randrange(1,10**4)) for (a,b) in edges]
def generate_testfile(ntest, maxi, filename="big.in"):
# test file for Blinnet problem
# www.spoj.com/problems/BLINNET/
testfile=open(filename, "w")
out=["%s" %ntest]
for i in range(ntest):
n=randrange(5,MAXI+1)
m=randrange(n-1,min(MAXI, n*(n-1)//2))
G=random_connected_graph(n, m)
L=[ for _ in range(n)]
for a,b,p in G:
L[a].append((b, p))
L[b].append((a, p))
temp=
temp.append("%s" %n)
for i in range(n):
temp.append("fake%s" %(i+1))
temp.append("%s" %len(L[i]))
for b, p in L[i]:
temp.append("%s %s" %(b+1, p))
out.append('\n'.join(temp))
testfile.write('\n\n'.join(out))
MAXI=30000
ntest=100
filename="big.in"
generate_testfile(ntest, MAXI, filename)
print("Test %s generated" %filename)
# ***********************