With graph-tool and joblib working together, do we need to send graph.copy() in the "Parallel" call like in the code below when using graph vertex filtering with .set_vertex_filter? graph.copy() makes memory usage extreme in large graphs (2M Vs, 4M Es) but in my head ensures any concurrency problems. (or 'graph' without '.copy()' is ok?)
What is the best way to run parallel graph searches and filtering (different vertex per thread) with graph-tool and joblib? (or without joblib)
### # defined and filled earlier g_graph = graph_tool.Graph(directed=False) eprop_ang = g_graph.new_edge_property("float")
### from joblib import Parallel, delayed import multiprocessing import os import tempfile import shutil import datetime
path2 = tempfile.mkdtemp() out_path2 = os.path.join(path2,'z6path_out2.mmap') out2 = np.memmap(out_path2, dtype=np.float32, shape=(g_graph.num_vertices(),dims), mode='w+')
num_cores = 30 num_pre_workers = 60
def runparallel(graph, row, out2): dist, pred = graph_tool.search.dijkstra_search(graph, graph.vertex(row), weight=eprop_ang) ## etc etc #####
v_filter = graph.new_vertex_property('bool',val=False) for v in SOMETHING_LOCAL: v_filter[v] = True graph.set_vertex_filter(v_filter) # do something with the filtered 'graph' (subgraph) # and save output to out2 out4[row] = RESULT ## graph.clear_filters()
Parallel(n_jobs=num_cores, pre_dispatch=num_pre_workers, verbose=1)(delayed(runparallel)(g_graph.copy(), r, out2) for r in range(g_graph.num_vertices()))
-- Sent from: http://main-discussion-list-for-the-graph-tool-project.982480.n3.nabble.com/