#!/usr/bin/env python3

# This code contains an implementation of the edge_betweenness
# calculation necessary to perform the Girvan Newman algorithm for
# graph partitioning.
# 
# Made by Leonardo Tamiano on 04/06/2020.

# ---------------------------------------------------------
# Data Structures Description
# -----------------

# Graph represent by adjacent lists using dictionaries. In particular, we let
# G[u] := {nodes v which are adjacent to u}

G = {
    1: [2, 3, 4],
    2: [1, 3, 4],
    3: [1, 2],
    4: [1, 2]
}

# ---------------------------------------------------------
# I/O with graphviz function
# -----------------

# writes graph to filename using grapvhiz format
#
# TODO: improve efficiency by removing the use of found_edges.
def write_graph_to_file(G, filename):
    found_edges = {}
    with open(filename, "w") as f:
        f.write("graph G {\n")
        for u in G:
            for v in G[u]:
                if (u, v) not in found_edges.keys() and (v, u) not in found_edges.keys():
                    found_edges[(u, v)] = True
                    f.write(f"{u} -- {v} \n")
        f.write("}\n")

# ---------------------------------------------------------
# Helper functions
# -----------------

# computes the tree obtained by doing a BFS (Breath First Search)
# starting from node s in the graph G
def bfs(G, s):
    T = {}
    border = [s]

    # continue as long as there are nodes to explore
    while border != []:
        u = border.pop(0)
        T[u] = []
        
        # explore new node
        for v in G[u]:
            # add it in the tree only if its the first time I reach this node
            if v not in T.keys():
                T[u].append(v)
                T[v] = []
                border.append(v)
    return T

# computes sets V_1, V_2, ..., V_d, where
# V_i := {nodes u which sit at distance i from the root of the tree T}
def compute_sets_by_distance(T, r, d, sets):
    for u in T[r]:
        if d+1 in sets.keys():
            sets[d+1].append(u)
        else:
            sets[d+1] = [u]
        compute_sets_by_distance(T, u, d+1, sets)

# returns True if the graph is connected, and False otherwise.
def is_graph_connected(G):
    s = list(G.keys())[0]

    # execute a BFS starting from s
    T = bfs(G, s)

    # return True only if every node has been explored
    return len(T.keys()) == len(G.keys())

# ---------------------------------------------------------
# Main functions
# -----------------

# computes the function b_s(u, v), over all edges (u, v), where b_s(u,
# v) is defined as the sum over all t in V of the fraction of the
# shortest paths that go from s to t which pass over the edge (u, v)
# over all the shortest paths that go from s to t
def _edge_betwenness_s(G, s):
    # ---------------------------------------------------
    # 1) BFS on s
    
    T = bfs(G, s)

    # ---------------------------------------------------
    # 2) Compute number of shortest paths from s to t

    # sigma_s[u] := number of shortest paths from s to u
    # sets[i] := V_i = {nodes at distances i from root of tree T}
    
    sigma_s = {} 
    sets = {}    
    compute_sets_by_distance(T, s, 0, sets)

    # -- base case
    for u in sets[1]:
        sigma_s[u] = 1
        
    ## -- general case
    if len(sets) > 1:
        for d in range(2, len(sets)+1):
            for u in sets[d]:
                sigma_s[u] = 0
                for v in sets[d-1]:
                    if v in G[u]:
                        sigma_s[u] += sigma_s[v]

    # ---------------------------------------------------
    # 3) Final bottom-up step

    # flux_node_s[u] := flux that node s sends to node u
    # flux_edge_s[(u, v)] := flux from node s sends through edge (u, v)
    
    flux_node_s = {} 
    flux_edge_s = {} 
        
    # -- base case
    # compute flux_node_s
    for t in sets[len(sets)]:
        flux_node_s[t] = 1

    # compute flux_edge_s
    if len(sets) == 1:
        for t in sets[1]:
            if t in G[s]:
                flux_edge_s[(s, t)] = 1
    else:
        for v in sets[len(sets)-1]:
            for t in sets[len(sets)]:
                if t in G[v]:
                    flux_edge_s[(v, t)] = 1 * sigma_s[v] / sigma_s[t]

    ## -- general case
    for j in range(len(sets)-1, 0, -1):
        
        # compute flux_node_s
        for u in sets[j]:
            flux_node_s[u] = 1
            for v in sets[j+1]:
                if v in G[u]:
                    flux_node_s[u] += flux_edge_s[(u, v)]

        # compute flux_edge_s
        if j > 1:
            for u in sets[j-1]:
                for v in sets[j]:
                    if v in G[u]:
                        flux_edge_s[(u, v)] = flux_node_s[v] * sigma_s[u] / sigma_s[v]
        elif j == 1:
            for v in sets[1]:
                if v in G[s]:
                    flux_edge_s[(s, v)] = flux_node_s[v] * 1 / sigma_s[v]

    return flux_edge_s

# computes betweenness of edge (u, v) in graph G
def edge_betwenness(G, u, v):
    res = 0
    for s in G.keys():
        flux_edge_s = _edge_betwenness_s(G, s)
        if (u, v) in flux_edge_s:
            res += flux_edge_s[(u, v)]
        elif (v, u) in flux_edge_s:
            res += flux_edge_s[(v, u)]
        
    return res/2


# partition graph using girvan newman betwenness centrality measure.
def girvan_newman(G):
    connected = is_graph_connected(G)

    # graph is already disconnected, no need to partition it    
    if not connected:
        return G
        
    # iterate over all edges of the graph to compute the
    # edge_betweenness
    betweenness_value = {}
    for u in G:
        for v in G[u]:
            betweenness_value[(u, v)] = edge_betwenness(G, u, v)

    # start removing edges with highest edge-betweenness until graph
    # disconnects
    while connected:
        # find edge with max betweenness value
        max_edge = max(betweenness_value, key=betweenness_value.get)
        u, v = max_edge
        
        # remove edge from graph
        G[u].remove(v)
        G[v].remove(u)
        
        # remove edge from betweennes dictionary
        betweenness_value.pop(max_edge, None)
        
        # check if graph is still connected
        connected = is_graph_connected(G)

    return G
    
# ---------------------------------------------------------

if __name__ == "__main__":
    G1 =  {
        1: [2, 4],
        2: [1, 3],
        3: [2, 4, 5],
        4: [1, 3],
        5: [3, 6, 8],
        6: [5, 7],
        7: [6, 8],
        8: [5, 7]
    }

    G2 = girvan_newman(G1)