#
# This file contains a bunch of code that computes the inter annotator
# agreement using the annotations made during lesson. Various methods
# are used, some only work for 2 annotators, and some for multiple
# annotators. In any case, related documentation to the wikipedia page
# describing the methods is reported.
#
# Here follows some data found by executing the script. Note that the
# various IDs used correspond to the sheetID. 
#
#
# Made by Leonardo Tamiano on 24/10/19. 
#
    
# ------------------ Imports  ---------------------------------------

import gspread
from oauth2client.service_account import ServiceAccountCredentials
from pprint import pprint 
from itertools import combinations
import numpy as np

scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("creds.json", scope)
client = gspread.authorize(creds)
sh = client.open("Annotazione")

ANNOTATION_CLASSES = ["ADJ", "ADP", "ADV", "AUX", "CCONJ", "DET", "INTJ", "NOUN",
                      "NUM", "PART", "PRON", "PROPN", "PUNCT", "SCONJ", "SYM", "VERB", "X"]

# Data taken from google sheet
ROWS = [
    [u'ADV', u'VERB', u'ADV', u'ADJ', u'SCONJ', u'DET', u'NOUN', u'DET', u'NOUN', u'ADP', u'NOUN', u'ADJ',
     u'ADP', u'PROPN', u'PROPN', u'PROPN', u'VERB', u'ADJ', u'SYM', u'CCONJ', u'SCONJ', u'VERB', u'DET', u'ADJ',
     u'NOUN', u'SCONJ', u'VERB', u'AUX', u'ADJ', u'ADP', u'NOUN', u'SYM', u'PUNCT', u'ADV', u'SCONJ', u'VERB',
     u'ADJ', u'VERB', u'SCONJ', u'ADV', u'ADJ', u'NOUN', u'ADP', u'ADJ', u'NOUN', u'CCONJ', u'DET', u'NOUN', 
     u'ADV', u'VERB', u'ADJ', u'DET', u'NOUN', u'CCONJ', u'DET', u'NOUN', u'PROPN', u'PART', u'PROPN', u'PUNCT'],
    
    [u'part', u'aux', u'adv', u'adj', u'sconj', u'det', u'noun', u'adp', u'noun', u'adp', u'noun', u'adj',
     u'adp', u'propn', u'propn', u'propn', u'aux', u'adj', u'sym', u'cconj', u'cconj', u'verb', u'det', u'adj',
     u'noun', u'sconj', u'verb', u'verb', u'verb', u'adp', u'propn', u'sym', u'sym', u'sconj', u'sconj', u'verb',
     u'adj', u'verb', u'adp', u'adp', u'adj', u'noun', u'', u'adj', u'noun', u'cconj', u'adp', u'noun', u'adv',
     u'aux', u'adj', u'adp', u'noun', u'cconj', u'adp', u'propn', u'propn', u'propn', u'propn', u'sym'],
    
    [u'CCONJ', u'VERB', u'ADV', u'ADJ', u'SCONJ', u'DET', u'NOUN', u'ADP', u'NOUN', u'ADP', u'NOUN', u'ADJ',
     u'ADP', u'PROPN', u'PROPN', u'PROPN', u'AUX', u'VERB', u'SYM', u'SCONJ', u'CCONJ', u'VERB', u'DET', u'ADJ',
     u'NOUN', u'SCONJ', u'AUX', u'VERB', u'ADJ', u'ADP', u'PROPN', u'SYM', u'PUNCT', u'ADJ', u'ADV', u'AUX', u'VERB',
     u'VERB', u'PRON', u'ADV', u'ADJ', u'NOUN', u'ADP', u'ADJ', u'NOUN', u'ADP', u'DET', u'NOUN', u'ADV', u'AUX',
     u'VERB', u'DET', u'NOUN', u'ADP', u'DET', u'NOUN', u'PROPN', u'PROPN', u'PROPN', u'PUNCT'],

    [u'ADV', u'VERB', u'ADV', u'ADJ', u'CCONJ', u'DET', u'NOUN', u'ADP', u'NOUN', u'CCONJ', u'NOUN', u'ADJ',
     u'ADP', u'PROPN', u'PROPN', u'PROPN', u'VERB', u'ADJ', u'PUNCT', u'ADV', u'CCONJ', u'VERB', u'DET', u'ADJ',
     u'NOUN', u'CCONJ', u'VERB', u'VERB', u'VERB', u'ADP', u'PROPN', u'PUNCT', u'PUNCT', u'PRON', u'CCONJ',
     u'VERB', u'ADJ', u'VERB', u'CCONJ', u'ADJ', u'ADJ', u'NOUN', u'ADP', u'ADJ', u'NOUN', u'ADP', u'DET', u'NOUN',
     u'ADV', u'VERB', u'ADJ', u'DET', u'NOUN', u'ADP', u'DET', u'PROPN', u'PROPN', u'PROPN', u'PROPN', u'PUNCT'],

    [u'ADV', u'VERB', u'ADV', u'ADJ', u'SCONJ', u'DET', u'NOUN', u'ADP', u'NOUN', u'ADP', u'NOUN', u'ADJ', u'ADP',
     u'PROPN', u'PROPN', u'PROPN', u'VERB', u'ADJ', u'SYM', u'SCONJ', u'SCONJ', u'VERB', u'DET' , u'ADJ', u'NOUN',
     u'SCONJ', u'AUX', u'AUX', u'VERB', u'ADP', u'NOUN', u'SYM', u'PUNCT', u'PRON', u'SCONJ', u'VERB', u'ADJ', u'VERB',
     u'SCONJ', u'ADV', u'PRON', u'NOUN', u'ADP', u'ADJ', u'NOUN', u'ADP', u'DET', u'NOUN', u'NOUN', u'VERB', u'ADJ',
     u'DET', u'NOUN', u'ADP', u'DET', u'NOUN', u'PROPN', u'PROPN', u'PROPN'],

    [u'AUX', u'VERB', u'ADJ', u'NOUN', u'SCONJ', u'DET', u'NOUN', u'ADP', u'NOUN', u'ADP', u'NOUN', u'ADJ', u'ADP',
     u'PROPN', u'PROPN', u'PROPN', u'VERB', u'NOUN', u'PUNCT', u'SCONJ', u'SCONJ', u'VERB', u'DET', u'ADJ', u'NOUN',
     u'ADV', u'AUX', u'VERB', u'VERB', u'ADP', u'PROPN', u'PUNCT', u'PUNCT', u'DET', u'SCONJ', u'VERB', u'ADV', u'VERB',
     u'SCONJ', u'ADV', u'PRON', u'NOUN', u'ADP', u'ADV', u'NOUN', u'ADP', u'DET', u'NOUN', u'ADV', u'VERB', u'NOUN',
     u'DET', u'NOUN', u'ADP', u'DET', u'NOUN', u'PROPN', u'PROPN', u'PROPN', u'PUNCT'],

    [u'ADV', u'AUX', u'ADJ', u'NOUN', u'SCONJ', u'DET', u'NOUN', u'ADP', u'NOUN', u'ADP', u'NOUN', u'ADJ', u'ADP',
     u'PROPN', u'PROPN', u'PROPN', u'AUX', u'NOUN', u'PUNCT', u'CCONJ', u'SCONJ', u'VERB', u'DET', u'ADJ', u'NOUN',
     u'SCONJ', u'AUX', u'VERB', u'ADV', u'ADP', u'PROPN', u'PUNCT', u'PUNCT', u'PRON', u'SCONJ', u'AUX', u'ADV',
     u'AUX', u'SCONJ', u'ADV', u'ADV', u'NOUN', u'ADP', u'ADJ', u'NOUN', u'ADP', u'DET', u'NOUN', u'ADV', u'AUX',
     u'NOUN', u'DET', u'NOUN', u'ADP', u'DET', u'PROPN', u'PROPN', u'PART', u'PROPN', u'PUNCT']
]

# ------------------ Agreement for 2 annotators  ---------------------------------------

def basic_agreement_from_spreadsheet(i, j, agreement_function):
    # get data from google spreadsheet
    wsheet1 = sh.get_worksheet(i - 1)
    row11 = wsheet1.row_values(2)
    row12 = wsheet1.row_values(4)

    wsheet2 = sh.get_worksheet(j - 1)
    row21 = wsheet2.row_values(2)
    row22 = wsheet2.row_values(4)
    
    return basic_agreement(row11 + row12, row21 + row22, ANNOTATION_CLASSES, agreement_function)

def basic_agreement(row1, row2, classes, agreement_function):
    # annotator statistics: for each annotetor and for each class we
    # count the number of time that annotator has choosen that class.
    antr_1_stats = {}
    antr_2_stats = {}
    accuracy = 0
    terms = 0
    
    for c in classes:
        antr_1_stats[c] = 0
        antr_2_stats[c] = 0
        
    # check annotations
    for i in range(0, min(len(row1), len(row2))):
        annotation_1 = row1[i].upper()
        annotation_2 = row2[i].upper()

        # check if they both have annotated that word. If not, do not
        # consider it.
        if annotation_1 != "" and annotation_2 != "":
            if annotation_1 == annotation_2:
                accuracy += 1

            antr_1_stats[annotation_1] += 1
            antr_2_stats[annotation_2] += 1
            terms += 1

    # compute relative observed agreement
    accuracy = accuracy / float(terms)
    pe = agreement_function(antr_1_stats, antr_2_stats, classes, terms)
    
    agreement = (accuracy - pe)/(1 - pe)
    return agreement

# compute hypothetical probability of chance agreement described in
# cohen's kappa method
def cohen_kappa_hp(antr_1_stats, antr_2_stats, classes, terms):
    pe = 0
    for c in classes:
        pe += antr_1_stats[c] * antr_2_stats[c]
    pe = pe / float((terms**2))

    return pe

# compute hypothetical probability of chance agreement described in
# scott's pi method
def scott_pi_hp(antr_1_stats, antr_2_stats, classes, terms):
    pe = 0
    for c in classes:
        pe += ((antr_1_stats[c] + antr_2_stats[c])/float(terms))**2

    return pe

# ------------------ Agreement for multiple annotators  ---------------------------------------

def agreement_from_spreadsheet():
    rows = []
    
    # get data from google spreadsheet
    for i in range(0, 7):
        wsheet = sh.get_worksheet(i)
        row1 = wsheet.row_values(2)
        row2 = wsheet.row_values(4)

        rows.append(row1 + row2)

    return fleiss_kappa(rows, ANNOTATION_CLASSES)

# Implementation of ideas found in
# https://www.wikiwand.com/en/Fleiss%27_kappa
# 
def fleiss_kappa(rows, classes):
    # we need to calculate, for each word and for each category, how
    # many annotators assigned that category for that word.

    # how many annotators do we have?
    n = len(rows)

    # how many subjects do we have at most?
    N_max = len(rows[0])

    # how many classes do we have?
    k = len(classes)
    
    classes_to_num = {
        "ADJ": 0, "ADP": 1, "ADV": 2, "AUX": 3, "CCONJ": 4, "DET": 5, "INTJ": 6, "NOUN": 7,
        "NUM": 8, "PART": 9, "PRON": 10, "PROPN": 11, "PUNCT": 12, "SCONJ": 13, "SYM": 14,
        "VERB": 15, "X": 16
    }
    
    # matrix[i][j] := number of raters who assigned the i-th subject
    # to the j-th category
    m = np.zeros((N_max, k))
    for i in range(0, N_max):
        for a in range(0, n):
            if i < len(rows[a]) and rows[a][i] != '':
                m[i][classes_to_num[rows[a][i].upper()]] += 1


    # P[i] = extent to which raters agree for the i-th subject
    P = [0] * N_max
    for i in range(0, N_max):
        for j in range(0, k):
            P[i] += m[i][j]**2
            
        P[i] = P[i] - n
        P[i] = (1/float((n * (n-1)))) * P[i]

    # P_mean = mean of the Pi's
    P_mean = 0
    for i in range(0, N_max):
        P_mean += P[i]
    P_mean = P_mean / float(N_max)

                
    # p[j] = proportion of all assignment which were to the j-th category
    p = [0] * k
    for j in range(0, k):
        for i in range(0, N_max):
            p[j] += m[i][j]
        p[j] = p[j] / (float)(N_max*n)
    
    # PE as defined in the wikipedia page
    PE = 0
    for j in range(0, k):
        PE += p[j]**2

    # final agreement
    return (P_mean - PE)/float(1 - PE)


# ------------------ Testing area ---------------------------------------


# print coefficients for given spreadsheet
my_list = [1, 2, 3, 4, 5, 6, 7]
function = cohen_kappa_hp
for pair in combinations(my_list, 2):
    i, j = pair
    value = basic_agreement(ROWS[i-1], ROWS[j-1], ANNOTATION_CLASSES, function)
    print("Scott's pi coefficient for " + str(i) + ", " + str(j) + " is: " + str(value))

print(fleiss_kappa(ROWS, ANNOTATION_CLASSES))