import sys import time import re import json import pandas as pd import numpy as np from pathlib import Path from replay import play file_path = "data/train.jsonl" if True or sys.argv[1:2] == ["o3-mini"]: model_name = "o3-mini" batch_file_path = f"exp/structured_request_{model_name}.jsonl" batch_result = Path(batch_file_path).with_suffix('.responses') reasoning = "explanation" data = [] with open(file_path, 'r') as f: for line in f: data.append(json.loads(line)) df = pd.DataFrame(data) df['techniques'] = df['techniques'].apply(lambda x: x if isinstance(x, list) else []) all_labels = sorted({label for techniques in df['techniques'] for label in techniques}) all_labels_re = re.compile('|'.join(all_labels)) stage = 3 if Path(batch_result).exists() else (1 if Path(batch_file_path).exists() else 0) if stage < 1: with open(batch_file_path, 'w') as f: for i, row in df.iterrows(): request = { "method": "POST", "url": "/v1/chat/completions", "body": { "model": model_name, "messages": [ {"role": "user", "content": f"The following text may use manipulative language. Identify manipulation techniques present, if any, from this list of labels: {', '.join(all_labels)}. Read the message and flag the presence of each technique. Use the following format:\nlabel1 justification for label1\nlabel2 justification for label2\nMessage:\n" + row["content"] + "\n"}, ], }, } print(json.dumps(request, ensure_ascii=False), file=f) if stage < 4: from replay import play_list, replay_list batch_result = play_list(batch_file_path) batch_result = replay_list(batch_file_path) tp = {label: 0 for label in all_labels} tn = {label: 0 for label in all_labels} fp = {label: 0 for label in all_labels} fn = {label: 0 for label in all_labels} manip_fp = 0 manip_fn = 0 manip_tp = 0 manip_tn = 0 for source, request, response in zip(data, Path(batch_file_path).open(), Path(batch_result).open()): text = json.loads(response)['choices'][0]['message']['content'] content = {} for line in text.split("\n"): parts = line.split(maxsplit=1) if len(parts) == 2: key, value = parts if key in all_labels: content[key] = value else: print(line) else: print(parts) print(content) reference = {label: True for label in source['techniques'] or []} hypothesis = {label: True for label in all_labels if content.get(label)} remaining_labels = list(all_labels) for label in reference: if label in hypothesis: tp[label] += 1 remaining_labels.remove(label) else: fn[label] += 1 remaining_labels.remove(label) for label in hypothesis: if label not in reference: fp[label] += 1 remaining_labels.remove(label) for label in remaining_labels: tn[label] += 1 # manipulative if any of these match if reference and hypothesis: manip_tp += 1 elif reference and not hypothesis: manip_fn += 1 elif not reference and hypothesis: manip_fp += 1 else: manip_tn += 1 f1 = [] for label in all_labels: precision = tp[label] / (tp[label] + fp[label]) recall = tp[label] / (tp[label] + fn[label]) accuracy = (tp[label] + tn[label]) / (tp[label] + tn[label] + fp[label] + fn[label]) print(f'{label=} {precision=:0.2f} {recall=:0.2f} {accuracy=:0.2f}') f1.append(2*precision*recall/(precision+recall)) f1 = np.mean(f1) print(f'macro {f1=:.2f} over {all_labels}') precision = manip_tp / (manip_tp + manip_fp) recall = manip_tp / (manip_tp + manip_fn) accuracy = (manip_tp + manip_tn) / (manip_tp + manip_tn + manip_fp + manip_fn) f1 = 2*precision*recall/(precision+recall) print(f'any {precision=:0.2f} {recall=:0.2f} {accuracy=:0.2f} {f1=:0.2f}')