#!/usr/bin/env python3 # This code contains an implementation of the edge_betweenness # calculation necessary to perform the Girvan Newman algorithm for # graph partitioning. # # Made by Leonardo Tamiano on 04/06/2020. # --------------------------------------------------------- # Data Structures Description # ----------------- # Graph represent by adjacent lists using dictionaries. In particular, we let # G[u] := {nodes v which are adjacent to u} G = { 1: [2, 3, 4], 2: [1, 3, 4], 3: [1, 2], 4: [1, 2] } # --------------------------------------------------------- # I/O with graphviz function # ----------------- # writes graph to filename using grapvhiz format # # TODO: improve efficiency by removing the use of found_edges. def write_graph_to_file(G, filename): found_edges = {} with open(filename, "w") as f: f.write("graph G {\n") for u in G: for v in G[u]: if (u, v) not in found_edges.keys() and (v, u) not in found_edges.keys(): found_edges[(u, v)] = True f.write(f"{u} -- {v} \n") f.write("}\n") # --------------------------------------------------------- # Helper functions # ----------------- # computes the tree obtained by doing a BFS (Breath First Search) # starting from node s in the graph G def bfs(G, s): T = {} border = [s] # continue as long as there are nodes to explore while border != []: u = border.pop(0) T[u] = [] # explore new node for v in G[u]: # add it in the tree only if its the first time I reach this node if v not in T.keys(): T[u].append(v) T[v] = [] border.append(v) return T # computes sets V_1, V_2, ..., V_d, where # V_i := {nodes u which sit at distance i from the root of the tree T} def compute_sets_by_distance(T, r, d, sets): for u in T[r]: if d+1 in sets.keys(): sets[d+1].append(u) else: sets[d+1] = [u] compute_sets_by_distance(T, u, d+1, sets) # returns True if the graph is connected, and False otherwise. def is_graph_connected(G): s = list(G.keys())[0] # execute a BFS starting from s T = bfs(G, s) # return True only if every node has been explored return len(T.keys()) == len(G.keys()) # --------------------------------------------------------- # Main functions # ----------------- # computes the function b_s(u, v), over all edges (u, v), where b_s(u, # v) is defined as the sum over all t in V of the fraction of the # shortest paths that go from s to t which pass over the edge (u, v) # over all the shortest paths that go from s to t def _edge_betwenness_s(G, s): # --------------------------------------------------- # 1) BFS on s T = bfs(G, s) # --------------------------------------------------- # 2) Compute number of shortest paths from s to t # sigma_s[u] := number of shortest paths from s to u # sets[i] := V_i = {nodes at distances i from root of tree T} sigma_s = {} sets = {} compute_sets_by_distance(T, s, 0, sets) # -- base case for u in sets[1]: sigma_s[u] = 1 ## -- general case if len(sets) > 1: for d in range(2, len(sets)+1): for u in sets[d]: sigma_s[u] = 0 for v in sets[d-1]: if v in G[u]: sigma_s[u] += sigma_s[v] # --------------------------------------------------- # 3) Final bottom-up step # flux_node_s[u] := flux that node s sends to node u # flux_edge_s[(u, v)] := flux from node s sends through edge (u, v) flux_node_s = {} flux_edge_s = {} # -- base case # compute flux_node_s for t in sets[len(sets)]: flux_node_s[t] = 1 # compute flux_edge_s if len(sets) == 1: for t in sets[1]: if t in G[s]: flux_edge_s[(s, t)] = 1 else: for v in sets[len(sets)-1]: for t in sets[len(sets)]: if t in G[v]: flux_edge_s[(v, t)] = 1 * sigma_s[v] / sigma_s[t] ## -- general case for j in range(len(sets)-1, 0, -1): # compute flux_node_s for u in sets[j]: flux_node_s[u] = 1 for v in sets[j+1]: if v in G[u]: flux_node_s[u] += flux_edge_s[(u, v)] # compute flux_edge_s if j > 1: for u in sets[j-1]: for v in sets[j]: if v in G[u]: flux_edge_s[(u, v)] = flux_node_s[v] * sigma_s[u] / sigma_s[v] elif j == 1: for v in sets[1]: if v in G[s]: flux_edge_s[(s, v)] = flux_node_s[v] * 1 / sigma_s[v] return flux_edge_s # computes betweenness of edge (u, v) in graph G def edge_betwenness(G, u, v): res = 0 for s in G.keys(): flux_edge_s = _edge_betwenness_s(G, s) if (u, v) in flux_edge_s: res += flux_edge_s[(u, v)] elif (v, u) in flux_edge_s: res += flux_edge_s[(v, u)] return res/2 # partition graph using girvan newman betwenness centrality measure. def girvan_newman(G): connected = is_graph_connected(G) # graph is already disconnected, no need to partition it if not connected: return G # iterate over all edges of the graph to compute the # edge_betweenness betweenness_value = {} for u in G: for v in G[u]: betweenness_value[(u, v)] = edge_betwenness(G, u, v) # start removing edges with highest edge-betweenness until graph # disconnects while connected: # find edge with max betweenness value max_edge = max(betweenness_value, key=betweenness_value.get) u, v = max_edge # remove edge from graph G[u].remove(v) G[v].remove(u) # remove edge from betweennes dictionary betweenness_value.pop(max_edge, None) # check if graph is still connected connected = is_graph_connected(G) return G # --------------------------------------------------------- if __name__ == "__main__": G1 = { 1: [2, 4], 2: [1, 3], 3: [2, 4, 5], 4: [1, 3], 5: [3, 6, 8], 6: [5, 7], 7: [6, 8], 8: [5, 7] } G2 = girvan_newman(G1)