Note that you can still remove the last loop altogether by accessing the property maps as arrays: (tree.a * weight.a).sum()
This innocuous remark leads in fact to a huge speedup: you gain a factor 13, a lot of surprise! Now, Graph Tool has the best performance among Networkit, Igraph, Scipy and NetworkX of course. However, to put in perspective, Graph Tool is only 2 times faster than pure Python code where Kruskal is implemented with a basic Union-Find, we are far from genuine C/C++ performance, because usually graph implementations are 20 times faster in C/C++ than pure Python. Below, the complete benchmark. Note that Scipy code has the potential to become the faster, the problem with scipy graphs is the need to convert adjacency lists (or edges list) to Compressed Sparse Row and you have to write it in Python, this is VERY slow. # *********************** from time import clock from sys import stderr, stdin import networkit as nk from networkit.graph import RandomMaximumSpanningForest from scipy.sparse.csgraph import minimum_spanning_tree from scipy.sparse import csr_matrix from igraph import Graph import graph_tool as gt from graph_tool.topology import min_spanning_tree import numpy as np # ------------ Pure Python Code --------------------------- def find(s, parents): ups=[s] while True: up=parents[s] if up ==None: break s=up ups.append(s) for v in ups[:-1]: parents[v]=s return s def merge(s, t, parents, heights): rep_s=find(s, parents) rep_t=find(t, parents) if rep_s==rep_t: return False height_s=heights[rep_s] height_t=heights[rep_t] if height_s < height_t: parents[rep_s]=rep_t else: parents[rep_t]=rep_s if height_s==height_t: heights[rep_s]+=1 return True def kruskal_python(edges, n): edges.sort(key=lambda t:t[2]) parents=[None]*n heights=[0]*n k=0 cost=0 cnt=0 while cnt<n-1: a, b, w=edges[k] k+=1 if merge(a,b, parents, heights): cost+=w cnt+=1 return cost # ------------ Graph_tool Code --------------------------- def kruskal_gt_numpy_sum(wedges,n): g= gt.Graph(directed=False) weight = g.new_edge_property("long long") g.add_edge_list(np.array(wedges), eprops=[weight]) tree=min_spanning_tree(g, weights=weight) return (tree.a * weight.a).sum() def kruskal_gt(wedges,n): g= gt.Graph(directed=False) weight = g.new_edge_property("long long") g.add_edge_list(np.array(wedges), eprops=[weight]) tree=min_spanning_tree(g, weights=weight) return sum(b*weight[e] for (e,b) in zip(g.edges(), tree)) # ------------ Igraph Code --------------------------- def kruskal_igraph(wedges, n): edges, weights= zip(*[((a,b),w) for (a,b,w) in wedges]) g = Graph(n, edges=list(edges)) g.es["weight"] = weights tree=g.spanning_tree(weights) return sum(tree.es["weight"]) # ------------ Scipy Code --------------------------- def wedges2adj(edges, n): G=[[]for _ in range(n)] for a, b, w in edges: G[a].append((b,w)) G[b].append((a,w)) return G def wedges2scr(edges, n): G=wedges2adj(edges, n) indptr=[0] # effectifs cumulés cnt=0 for line in G: cnt+=len(line) indptr.append(cnt) data=[] indices=[] for i in range(n): for (j,w) in G[i]: data.append(w) indices.append(j) return [data, indptr, indices] def csr2wedges(Mcsr, shape): n, p=shape k=0 edges=[] data, cumul, cols = Mcsr.data,Mcsr.indptr, Mcsr.indices for i in range(n): for j in range(cumul[i+1]-cumul[i]): edges.append((i, cols[k], data[k])) k+=1 return edges def kruskal_scipy(edges, n): data, indptr, indices=wedges2scr(edges, n) csr=csr_matrix((data, indices, indptr), shape=(n, n)) tree=minimum_spanning_tree(csr) edges=csr2wedges(tree, (n,n)) return sum(int(round(w)) for (a,b,w) in edges) # ------------ Networkit Code --------------------------- def kruskal_networkit(edges, n): G=nk.Graph(n, weighted=True) for u, v,w in edges: G.addEdge(u,v,-w) msf=RandomMaximumSpanningForest(G) msf.run() weights=[] msf.getMSF().forEdges(lambda u, v, w, eid : weights.append(-w)) return round(sum(weights)) # ------------ NetworkX Code --------------------------- def kruskal_networkX(edges, n): import networkx as nx G=nx.Graph() for a, b, w in edges: G.add_edge(a,b,weight=w) return sum(d['weight'] for (_,_,d) in nx.minimum_spanning_tree(G).edges(data=True)) # ------------ Benchmark --------------------------- def go(solver, L): global duration N=len(L) k=1 while k<N: edges=[] n=L[k] k+=1 for a in range(n): d=L[k] k+=1 for j in range(d): b=L[k] k+=1 w=L[k] k+=1 if a<b: edges.append([a,b-1,w]) if solver==kruskal_scipy: data, indptr, indices=wedges2scr(edges, n) start=clock() csr=csr_matrix((data, indices, indptr), shape=(n, n)) tree=minimum_spanning_tree(csr) edges=csr2wedges(tree, (n,n)) duration+=clock()-start else: start=clock() print(solver(edges, n)) duration+=clock()-start # data L=list(int(z) for z in stdin.read().split() if z.isdigit()) for solver in [kruskal_networkit, kruskal_gt_numpy_sum, kruskal_gt,kruskal_python, kruskal_igraph, kruskal_scipy, kruskal_networkX]: duration=0 go(solver, L) print("%s : %.3f" %(solver.__name__, duration), file=stderr) # *********************** Here is the code to generate data tests: # *********************** from random import randrange def random_tree(n): edges=[] for a in range(1,n): b=randrange(a) edges.append((a,b)) return edges def random_connected_graph(n, m): edges={(a,b) if a<b else (b,a) for (a, b) in random_tree(n)} while len(edges)<m: a=randrange(n) b=randrange(n) t=tuple(sorted([a,b])) if t!=(a,a) and t not in edges: edges.add(t) return [(a,b,randrange(1,10**4)) for (a,b) in edges] def generate_testfile(ntest, maxi, filename="big.in"): # test file for Blinnet problem # www.spoj.com/problems/BLINNET/ testfile=open(filename, "w") out=["%s" %ntest] for i in range(ntest): n=randrange(5,MAXI+1) m=randrange(n-1,min(MAXI, n*(n-1)//2)) G=random_connected_graph(n, m) L=[[] for _ in range(n)] for a,b,p in G: L[a].append((b, p)) L[b].append((a, p)) temp=[] temp.append("%s" %n) for i in range(n): temp.append("fake%s" %(i+1)) temp.append("%s" %len(L[i])) for b, p in L[i]: temp.append("%s %s" %(b+1, p)) out.append('\n'.join(temp)) testfile.write('\n\n'.join(out)) MAXI=30000 ntest=100 filename="big.in" generate_testfile(ntest, MAXI, filename) print("Test %s generated" %filename) # *********************** -- Sent from: http://main-discussion-list-for-the-graph-tool-project.982480.n3.nabble.com/