from typing import Dict, Tuple from pathlib import Path from glob import glob from ner_utils import read_proofreaded_bsf_data from itertools import combinations def calculate_iaa(file1: Path, file2: Path) -> tuple[float, float, float]: annotator1_entities = set( (x.tag, x.start_idx, x.end_idx) for x in read_proofreaded_bsf_data(Path(file1)) ) annotator2_entities = set( (x.tag, x.start_idx, x.end_idx) for x in read_proofreaded_bsf_data(Path(file2)) ) true_positives = len(annotator1_entities.intersection(annotator2_entities)) annotator1_false_positives = len(annotator1_entities - annotator2_entities) annotator2_false_positives = len(annotator2_entities - annotator1_entities) false_negatives = annotator1_false_positives # Assuming both annotators should identify the same entities precision = ( true_positives / (true_positives + annotator1_false_positives) if true_positives + annotator1_false_positives > 0 else 0 ) recall = ( true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0 ) f1_score = ( 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0 ) return f1_score, true_positives, true_positives + annotator1_false_positives if __name__ == "__main__": f_ones: Dict[Tuple[str, str], float] = {} tps, alls = 0, 0 for folder in glob("../databank/seva/**/*/"): files = list(Path(folder).glob("*.ann")) for file1, file2 in combinations(files, 2): f1, tp, all_ = calculate_iaa(file1, file2) f_ones[(file1, file2)] = f1 tps += tp alls += all_ # print( # f"{file1} vs {file2}: {calculate_iaa(file1, file2)}" # ) print("Number of comparisons:", len(f_ones)) print("Total true positives:", tps) print("Total all:", alls) print("Average precision:", tps / alls) print("Average F1-score:", sum(f_ones.values()) / len(f_ones))