|
|
|
|
|
|
|
|
""" |
|
|
Evaluation back-end for the βinteractive-graphβ interface on Hugging Face Spaces |
|
|
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
|
* Serves all evaluation UIs (`/eval_interfaces/<option>`). |
|
|
* Transparently patches every explanation HTML so you never touch the originals: |
|
|
βΈ layout / badge / telemetry tweaks (unchanged) |
|
|
βΈ **NEW** helper that counts steps and answers the parentβs |
|
|
`xai-get-step-count`, so the prompt shows β1 β Nβ. |
|
|
* **DEBUG MODE** (toggle with `ICOT_DEBUG=1`) prints: |
|
|
βΈ every file request + whether it was patched |
|
|
βΈ console-side step counts inside each iframe. |
|
|
* Persists results to CSV / JSON and can push them to |
|
|
`LLM-XAI/interactive-COT-data` if `ICOT_TOKEN` is set. |
|
|
""" |
|
|
|
|
|
import os, csv, json, uuid, logging |
|
|
from datetime import datetime |
|
|
from pathlib import Path |
|
|
from flask import ( |
|
|
Flask, abort, Response, send_file, request, |
|
|
url_for, render_template_string, jsonify |
|
|
) |
|
|
from huggingface_hub import HfApi, login |
|
|
import re |
|
|
from datasets import load_dataset |
|
|
import random |
|
|
|
|
|
|
|
|
MAX_USERS = 50 |
|
|
COT_COUNTER_FILE = "regular_cot_counter.txt" |
|
|
GRAPH_COUNTER_FILE = "graph_counter.txt" |
|
|
CODE_COUNTER_FILE = "code_counter.txt" |
|
|
NATURAL_LANG_COUNTER_FILE = "natural_lang_counter.txt" |
|
|
|
|
|
|
|
|
SELECTED_CARD = "graph" |
|
|
|
|
|
def get_submit_counter(file_path:str) -> int: |
|
|
with open(file_path, 'r') as f: |
|
|
try: |
|
|
val = int(f.read().strip()) |
|
|
except ValueError: |
|
|
val = 0 |
|
|
return val |
|
|
|
|
|
def increment_submit_counter(file_path:str) -> int: |
|
|
with open(file_path, 'r+') as f: |
|
|
current = get_submit_counter(file_path) |
|
|
new_value = (current+1) % (MAX_USERS+1) |
|
|
if new_value == 0: |
|
|
new_value = 1 |
|
|
f.seek(0) |
|
|
f.write(str(new_value)) |
|
|
f.truncate() |
|
|
return new_value |
|
|
|
|
|
|
|
|
def increment_submit_counter_absolute(file_path:str) -> int: |
|
|
with open(file_path, 'r+') as f: |
|
|
current = get_submit_counter(file_path) |
|
|
new_value = current+1 |
|
|
f.seek(0) |
|
|
f.write(str(new_value)) |
|
|
f.truncate() |
|
|
return new_value |
|
|
|
|
|
def get_the_min_interface() -> str: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
format_lst= ["code", "graph", "inl","cot"] |
|
|
chosen_format = random.choice(format_lst) |
|
|
return chosen_format |
|
|
|
|
|
|
|
|
def get_interface_format(sample_list): |
|
|
file_path = sample_list[0].get("file", "") |
|
|
parts = file_path.split("eval_interfaces/") |
|
|
if len(parts) > 1: |
|
|
return parts[1].split("/")[0] |
|
|
return None |
|
|
|
|
|
|
|
|
DEBUG_MODE = os.getenv("ICOT_DEBUG", "0") != "0" |
|
|
|
|
|
logging.basicConfig( |
|
|
level=logging.DEBUG if DEBUG_MODE else logging.INFO, |
|
|
format="%(asctime)s | %(levelname)-8s | %(message)s" |
|
|
) |
|
|
log = logging.getLogger(__name__) |
|
|
log.info("Debug mode: %s", DEBUG_MODE) |
|
|
|
|
|
|
|
|
HF_TOKEN = os.getenv("ICOT_TOKEN") |
|
|
if HF_TOKEN: |
|
|
login(token=HF_TOKEN) |
|
|
else: |
|
|
log.warning("ICOT_TOKEN not set β results will stay local") |
|
|
|
|
|
HF_REPO = "Miles1999/interactive-COT-data" |
|
|
HF_FOLDER = "session_logs" |
|
|
|
|
|
CODEBASE_DIR = "." |
|
|
EVAL_PAGES = { |
|
|
"cot" : "evaluation/eval_interfaces/reg_cot_eval_interface.html", |
|
|
"interactive_nl" : "evaluation/eval_interfaces/nl_eval_interface.html", |
|
|
"interactive_code" : "evaluation/eval_interfaces/coding_eval_interface.html", |
|
|
"interactive_graph": "evaluation/eval_interfaces/graph_eval_interface.html", |
|
|
} |
|
|
ALLOWED_ROOTS = ["html_explanations", "evaluation"] |
|
|
|
|
|
CSV_FILENAME = "evaluation_stats.csv" |
|
|
CSV_PATH = Path(CSV_FILENAME).resolve() |
|
|
CSV_HEADER = [ |
|
|
"timestamp","session_id","user_name", |
|
|
"overallAccuracy(%)","correctItemAccuracy(%)","incorrectItemAccuracy(%)", |
|
|
"avgTimeCorrect","avgTimeIncorrect", |
|
|
] |
|
|
|
|
|
SESSION_DIR = Path("/tmp/sessions") |
|
|
SESSION_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def gen_session_id() -> str: |
|
|
return str(uuid.uuid4()) |
|
|
|
|
|
def save_session_local(sid: str, data: dict) -> Path: |
|
|
path = SESSION_DIR / f"{sid}.json" |
|
|
path.write_text(json.dumps(data, indent=2)) |
|
|
log.info("Stored session JSON β %s", path) |
|
|
return path |
|
|
|
|
|
def push_to_hf(local_path: Path, sid: str): |
|
|
try: |
|
|
HfApi().upload_file( |
|
|
path_or_fileobj=str(local_path), |
|
|
path_in_repo=f"{HF_FOLDER}/{local_path.name}", |
|
|
repo_id=HF_REPO, |
|
|
repo_type="dataset", |
|
|
) |
|
|
local_path.unlink() |
|
|
log.info("Uploaded session %s to HF & removed local copy", sid) |
|
|
except Exception as e: |
|
|
log.warning("HF upload failed for %s : %s", sid, e) |
|
|
|
|
|
|
|
|
INJECT_STYLE = """ |
|
|
<style> |
|
|
/* layout tweak: 40 / 60 split */ |
|
|
.left-panel { width:40%!important } |
|
|
.right-panel{ width:60%!important } |
|
|
|
|
|
/* hide βVariablesβ pane */ |
|
|
.variables-container{display:none!important} |
|
|
.explanation-container{flex:1!important;height:auto!important} |
|
|
/* numbered badge next to each step */ |
|
|
.step{display:none;align-items:center;gap:12px} |
|
|
.step.shown{display:flex} |
|
|
.step-content,.step>*:first-child{flex:1} |
|
|
.badge{ |
|
|
flex-shrink:0;color:#adadad;font-size:1.7rem; |
|
|
font-weight:700;padding:4px 14px;border-radius:16px;pointer-events:none; |
|
|
} |
|
|
|
|
|
/* ββ eliminate blank space in Problem Statement ββββββββββββββββββββ */ |
|
|
.problem-statement{ |
|
|
/* let the section size itself */ |
|
|
height:auto!important; /* overrides 50β―% */ |
|
|
flex:0 0 auto!important; /* occupy only the space it needs */ |
|
|
padding:20px!important; /* keep your original insets */ |
|
|
overflow-y:visible!important; /* scroll not usually needed here */ |
|
|
} |
|
|
|
|
|
.problem-understanding{ |
|
|
/* fill everything thatβs left */ |
|
|
flex:1 1 auto!important; /* grow/shrink with column */ |
|
|
height:auto!important; /* overrides 50β―% */ |
|
|
overflow-y:auto!important; /* still scroll if content is long */ |
|
|
} |
|
|
|
|
|
/* tidy up internal spacing */ |
|
|
.problem-statement p{ |
|
|
margin:0!important; |
|
|
line-height:1.4!important; |
|
|
} |
|
|
.section-title{ |
|
|
margin-bottom:8px!important; |
|
|
padding-bottom:3px!important; |
|
|
} |
|
|
|
|
|
/* ββ eliminate perβline margins in the Summary panel βββββββββββββββ */ |
|
|
.problem-understanding .variable-item, |
|
|
.problem-understanding p, |
|
|
.problem-understanding li { |
|
|
margin: 0 !important; /* no extra vertical gap */ |
|
|
padding: 0 !important; /* align text to the left edge */ |
|
|
} |
|
|
|
|
|
/* ββ make every control button solid black ββ */ |
|
|
.btn, |
|
|
.btn-play-pause, |
|
|
.btn-stop, |
|
|
.btn-prev, |
|
|
.btn-next, |
|
|
.btn-correct, |
|
|
.btn-wrong{ |
|
|
background: #000 !important; /* black fill */ |
|
|
color: #fff !important; /* white text so it stays readable */ |
|
|
border: none !important; /* ensure no colored borders peek through */ |
|
|
} |
|
|
|
|
|
.btn:hover { /* keep hover subtle */ |
|
|
opacity: 0.9 !important; |
|
|
} |
|
|
|
|
|
/* if you dim disabled buttons, keep them grey */ |
|
|
.btn.disabled { |
|
|
background: #6c6c6c !important; |
|
|
color: #fff !important; |
|
|
} |
|
|
|
|
|
/* Naturalβlanguage & graph explanations (.step.active) */ |
|
|
.step.active { /* remove yellow fill */ |
|
|
background: transparent !important; |
|
|
border: 2px solid #ffd700 !important; /* gold border */ |
|
|
padding-left: 10px !important; /* keep text indented */ |
|
|
} |
|
|
.code-line.current { |
|
|
background: transparent !important; |
|
|
border: 2px solid #ffd700 !important; |
|
|
padding-left: 8px !important; |
|
|
} |
|
|
.step.active::before { display: none !important; } /* hide blinking dot */ |
|
|
|
|
|
/* disable clicking */ |
|
|
.step{ |
|
|
pointer-events:none !important; /* clicks, hovers, etc. are ignored */ |
|
|
cursor: default !important; /* arrow cursor instead of pointer */ |
|
|
} |
|
|
/* Do not bold the formula*/ |
|
|
.formula { |
|
|
font-weight: normal; |
|
|
margin: 5px 0; |
|
|
} |
|
|
|
|
|
|
|
|
</style> |
|
|
""" |
|
|
|
|
|
|
|
|
INJECT_SCRIPT_BASE = """ |
|
|
<script> |
|
|
|
|
|
document.addEventListener('DOMContentLoaded', ()=>{ |
|
|
|
|
|
/* 1. optional: hide the download button in outer pages */ |
|
|
const dl = document.getElementById('download-btn'); |
|
|
if (dl) dl.remove(); // safe even if not present |
|
|
|
|
|
/* 2. rename headings */ |
|
|
const h = document.querySelector('.problem-understanding .section-title'); |
|
|
if (h) h.textContent = 'Summary'; |
|
|
const q = document.querySelector('.problem-statement .section-title'); |
|
|
if (q) q.textContent = 'Question'; |
|
|
|
|
|
/* 3. βGoalβ wording */ |
|
|
// Case 1: <div class="what-to-find"><h4>...</h4> |
|
|
const goalH4 = document.querySelector('.what-to-find h4'); |
|
|
if (goalH4) { |
|
|
goalH4.textContent = 'Goal:'; |
|
|
} |
|
|
|
|
|
// Case 2: standalone <h3>What we need to find |
|
|
const goalH3 = document.querySelector('.problem-understanding h3'); |
|
|
if (goalH3 && /what we need to find/i.test(goalH3.textContent)) { |
|
|
goalH3.textContent = 'Goal:'; |
|
|
} |
|
|
|
|
|
/* 5. remove the formula if there is one */ |
|
|
const stepContents = document.querySelectorAll('.step-content'); |
|
|
stepContents.forEach(step => { |
|
|
let currentNode = step.firstChild; |
|
|
const formulaDiv = step.querySelector('.formula'); |
|
|
while (currentNode && currentNode !== formulaDiv) { |
|
|
if (/[+\-*/=]/.test(currentNode.textContent)){ |
|
|
if(formulaDiv){ |
|
|
let next = formulaDiv.nextSibling; |
|
|
while (next) { |
|
|
const toRemove = next; |
|
|
next = next.nextSibling; |
|
|
step.removeChild(toRemove); |
|
|
formulaDiv.textContent = ""; |
|
|
} |
|
|
} |
|
|
break; |
|
|
} |
|
|
currentNode = currentNode.nextSibling; |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
/* 4. decorate & reveal steps */ |
|
|
const steps = [...document.querySelectorAll('.step')]; |
|
|
steps.forEach((s, i) => { |
|
|
const wrap = document.createElement('div'); |
|
|
wrap.className = 'step-content'; |
|
|
while (s.firstChild) wrap.appendChild(s.firstChild); |
|
|
s.appendChild(wrap); |
|
|
|
|
|
// Add badge for all steps except the last one |
|
|
if (i < steps.length - 1) { |
|
|
const badge = document.createElement('span'); |
|
|
badge.className = 'badge'; |
|
|
badge.textContent = '' + (i + 1); |
|
|
s.appendChild(badge); |
|
|
} |
|
|
}); |
|
|
if (steps.length) { |
|
|
steps[0].classList.add('active', 'shown'); |
|
|
window.currentStepIndex = 0; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* cumulative reveal on highlight change */ |
|
|
const sync = ()=>{ |
|
|
const idx=steps.findIndex(el=>el.classList.contains('active')); |
|
|
steps.forEach((el,i)=>el.classList.toggle('shown',i<=idx)); |
|
|
}; |
|
|
sync(); |
|
|
const obs=new MutationObserver(sync); |
|
|
steps.forEach(el=>obs.observe(el,{attributes:true,attributeFilter:['class']})); |
|
|
|
|
|
/* click telemetry */ |
|
|
const post = key => window.parent?.postMessage({type:'xai-click',key},'*'); |
|
|
const map = {playPauseBtn:'play',stopBtn:'stop',nextBtn:'next',prevBtn:'prev'}; |
|
|
Object.entries(map).forEach(([id,key])=>{ |
|
|
const btn=document.getElementById(id); |
|
|
if (btn) btn.addEventListener('click',()=>post(key),{capture:true}); |
|
|
}); |
|
|
/* Number only executable lines (i.e. lines *without* the .comment span). */ |
|
|
(function renumberCodeLines(){ |
|
|
const codeLines = document.querySelectorAll('.code-line'); |
|
|
let n = 0; |
|
|
codeLines.forEach(cl=>{ |
|
|
const numSpan = cl.querySelector('.line-number'); |
|
|
if(!numSpan) return; // safety check |
|
|
if(cl.querySelector('.comment')){ // comment line |
|
|
numSpan.textContent = ''; // blank β hides via :empty |
|
|
}else{ // real code |
|
|
numSpan.textContent = ++n; |
|
|
} |
|
|
}); |
|
|
})(); |
|
|
|
|
|
}); |
|
|
|
|
|
</script> |
|
|
""" |
|
|
|
|
|
|
|
|
INJECT_STEPCOUNT = """ |
|
|
<script> |
|
|
/* Counts steps and answers parent pageβs xai-get-step-count query */ |
|
|
(function(){ |
|
|
function countSteps(){ |
|
|
/* Case 1: explicit JSON */ |
|
|
if(typeof problemData==='object'){ |
|
|
if(problemData.totalSteps) return problemData.totalSteps; |
|
|
if(Array.isArray(problemData.steps))return problemData.steps.length; |
|
|
} |
|
|
/* Case 2: count DOM markers */ |
|
|
const sel=['.step-item','.step','.badge','[data-step]']; |
|
|
for(const s of sel){ |
|
|
const n=document.querySelectorAll(s).length; |
|
|
if(n) return n; |
|
|
} |
|
|
return 1; |
|
|
} |
|
|
function send(){ |
|
|
const n = countSteps(); |
|
|
/* >>> DEBUG <<< */ |
|
|
console.debug('[step-count]', n, window.location.pathname); |
|
|
window.parent?.postMessage({type:'xai-step-count', count:n}, '*'); |
|
|
} |
|
|
/* answer on request */ |
|
|
window.addEventListener('message',ev=>{ |
|
|
if(ev?.data?.type==='xai-get-step-count') send(); |
|
|
}); |
|
|
/* β¦and volunteer once */ |
|
|
if(document.readyState==='loading'){ |
|
|
document.addEventListener('DOMContentLoaded',send); |
|
|
}else{send();} |
|
|
})(); |
|
|
</script> |
|
|
""" |
|
|
|
|
|
|
|
|
INJECT_SCRIPT_GRAPH_BADGE = """ |
|
|
<script> |
|
|
document.addEventListener('DOMContentLoaded',()=>{ |
|
|
const frame = document.getElementById('step-iframe'); |
|
|
if(!frame) return; /* not a graph explanation */ |
|
|
/* inline style keeps badge self-contained */ |
|
|
const styleBadge = [ |
|
|
'flex-shrink:0', |
|
|
'color:#adadad', |
|
|
'font-size:1.7rem', |
|
|
'font-weight:700', |
|
|
'padding:2px 10px', |
|
|
'border-radius:14px', |
|
|
'pointer-events:none' |
|
|
].join(';'); |
|
|
function decorate(){ |
|
|
try{ |
|
|
const doc = frame.contentDocument; |
|
|
if(!doc) return; |
|
|
let num = 0; /* running step counter */ |
|
|
[...doc.querySelectorAll('.step-item')].forEach(el=>{ |
|
|
if(el.querySelector('.badge')) return; /* already done */ |
|
|
const step_id = el.id |
|
|
/* title text (trim & lowercase) */ |
|
|
const title = (el.textContent || '').trim().toLowerCase(); |
|
|
if(title.startsWith('final answer')) return; /* skip badge */ |
|
|
/* flex container so title & badge share a line */ |
|
|
el.style.display = 'flex'; |
|
|
el.style.alignItems = 'center'; |
|
|
el.style.gap = '12px'; |
|
|
if(el.firstElementChild) el.firstElementChild.style.flex = '1'; |
|
|
const b = doc.createElement('span'); |
|
|
b.className = 'badge'; |
|
|
b.style.cssText = styleBadge; |
|
|
b.textContent = '' + step_id; /* number only real steps */ |
|
|
el.appendChild(b); |
|
|
}); |
|
|
}catch(e){} |
|
|
} |
|
|
decorate(); /* decorate current content */ |
|
|
frame.addEventListener('load', decorate); /* β¦and on every reload */ |
|
|
}); |
|
|
</script> |
|
|
""" |
|
|
|
|
|
DISABLE_SCROLL_SCRIPT = """ |
|
|
<script> |
|
|
document.addEventListener('DOMContentLoaded', ()=>{ |
|
|
// Override scrollIntoView for all steps |
|
|
const steps = document.querySelectorAll('.step'); |
|
|
steps.forEach(s => { |
|
|
s.scrollIntoView = function(){}; |
|
|
}); |
|
|
}); |
|
|
</script> |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def preprocess_html(path: str) -> str: |
|
|
"""Return patched HTML as string, injecting style + scripts.""" |
|
|
html = Path(path).read_text(encoding="utf-8") |
|
|
|
|
|
|
|
|
html = re.sub(r'\bconst\s+problemData\b', 'window.problemData', html) |
|
|
|
|
|
|
|
|
inj = ( |
|
|
INJECT_STYLE + |
|
|
INJECT_STEPCOUNT+ |
|
|
INJECT_SCRIPT_BASE+ |
|
|
DISABLE_SCROLL_SCRIPT |
|
|
) |
|
|
|
|
|
res = html.replace("</head>", inj + "</head>", 1) if "</head>" in html else inj + html |
|
|
|
|
|
|
|
|
if DEBUG_MODE: |
|
|
log.debug("Injected helpers into %s (%d β %d bytes)", path, len(html), len(res)) |
|
|
return res |
|
|
|
|
|
|
|
|
def needs_transform(path: str) -> bool: |
|
|
p = path.replace("\\", "/").lower() |
|
|
return ( |
|
|
p.endswith(".html") and |
|
|
( |
|
|
"/interactive_nat_lang_explanations/" in p or |
|
|
"/interactive_graph_explanations/" in p or |
|
|
"/interactive_coding_explanations/" in p |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
|
|
|
|
|
|
SELECT_TEMPLATE = """ |
|
|
<!DOCTYPE html><html lang='en'><head><meta charset='utf-8'> |
|
|
<title>Select Evaluation Mode</title> |
|
|
<link href='https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap' rel='stylesheet'> |
|
|
<style> |
|
|
*,*:before,*:after{box-sizing:border-box} |
|
|
body{margin:0;min-height:100vh;display:flex;flex-direction:column;justify-content:center;align-items:center; |
|
|
font-family:'Inter',sans-serif;background:#f1f5f9} |
|
|
h1{color:#111827;font-size:2.4rem;font-weight:700;margin-bottom:2rem;text-align:center} |
|
|
.grid{width:90%;max-width:650px;display:grid;gap:80px;grid-template-columns:repeat(auto-fit,minmax(240px,1fr))} |
|
|
.card{width:350px; height:150px;padding:10px;border-radius:16px;color:#fff;font-weight:600;font-size:1.4rem; |
|
|
display:flex;flex-direction:column;justify-content:center;align-items:center;text-align:center; |
|
|
text-decoration:none;transition:.25s;box-shadow:0 6px 18px rgba(0,0,0,.08)} |
|
|
.card:hover{transform:translateY(-6px);box-shadow:0 10px 24px rgba(0,0,0,.12)} |
|
|
.cot{background:#ef4444}.inl{background:#f97316}.code{background:#10b981}.graph{background:#3b82f6} |
|
|
</style></head><body> |
|
|
<h1>Choose an Evaluation Interface</h1> |
|
|
<div class='grid'> |
|
|
<a class='card cot' href='/eval_interfaces/cot'>Chain of Thought</a> |
|
|
<a class='card inl' href='/eval_interfaces/interactive_nl'>Interactive Chain of Thought</a> |
|
|
<a class='card code' href='/eval_interfaces/interactive_code'>Interactive Program of Thought</a> |
|
|
<a class='card graph' href='/eval_interfaces/interactive_graph'>Interactive Graph</a> |
|
|
</div> |
|
|
</body></html> |
|
|
""" |
|
|
|
|
|
@app.route("/") |
|
|
def landing(): |
|
|
log.info("landing page update") |
|
|
SELECTED_CARD = get_the_min_interface() |
|
|
return render_template_string(SELECT_TEMPLATE, selected_card = SELECTED_CARD) |
|
|
|
|
|
|
|
|
@app.route("/eval_interfaces/<option>") |
|
|
def load_outer(option): |
|
|
global SELECTED_CARD |
|
|
rel = EVAL_PAGES.get(option) |
|
|
if not rel: |
|
|
abort(404) |
|
|
|
|
|
full_path = Path(CODEBASE_DIR) / rel |
|
|
html = full_path.read_text(encoding="utf-8") |
|
|
|
|
|
|
|
|
if option == "cot": |
|
|
counter = increment_submit_counter(COT_COUNTER_FILE) |
|
|
log.info("cot counter value %d", counter) |
|
|
injected = f"<script>const USER_COUNTER = {counter};</script>\n" |
|
|
html = html.replace("</head>", injected + "</head>") |
|
|
elif option == "interactive_graph": |
|
|
counter = increment_submit_counter(GRAPH_COUNTER_FILE) |
|
|
log.info("graph counter value %d", counter) |
|
|
injected = f"<script>const USER_COUNTER = {counter};</script>\n" |
|
|
html = html.replace("</head>", injected + "</head>") |
|
|
elif option == "interactive_code": |
|
|
counter = increment_submit_counter(CODE_COUNTER_FILE) |
|
|
log.info("code counter value %d", counter) |
|
|
injected = f"<script>const USER_COUNTER = {counter};</script>\n" |
|
|
html = html.replace("</head>", injected + "</head>") |
|
|
elif option == "interactive_nl": |
|
|
counter = increment_submit_counter(NATURAL_LANG_COUNTER_FILE) |
|
|
log.info("natural language counter value %d", counter) |
|
|
injected = f"<script>const USER_COUNTER = {counter};</script>\n" |
|
|
html = html.replace("</head>", injected + "</head>") |
|
|
|
|
|
return render_template_string(html) |
|
|
|
|
|
|
|
|
|
|
|
@app.route("/interactive-llm-xai/<path:sub>") |
|
|
@app.route("/eval_interfaces/interactive-llm-xai/<path:sub>") |
|
|
def serve_explanation(sub): |
|
|
full = Path(sub).resolve() |
|
|
needs = needs_transform(str(full)) |
|
|
log.info("serve_explanation | %s | needs_transform=%s", full, needs) |
|
|
|
|
|
if not full.exists(): |
|
|
abort(404) |
|
|
if full.is_dir(): |
|
|
return browse(sub) |
|
|
if needs: |
|
|
return Response(preprocess_html(str(full)), mimetype="text/html") |
|
|
return send_file(full) |
|
|
|
|
|
|
|
|
BROWSER_TEMPLATE = """ |
|
|
<!DOCTYPE html><html><head><meta charset='utf-8'><title>Browse</title> |
|
|
<style> |
|
|
html,body{font-family:Arial;margin:20px;height: 100vh !important;} |
|
|
ul{list-style:none;padding:0} li{margin:4px 0} |
|
|
a{text-decoration:none;color:#2563eb} a:hover{text-decoration:underline} |
|
|
.content{margin-top:15px;border:1px solid #e5e7eb;padding:10px;border-radius:8px;background:#f9fafb} |
|
|
</style></head><body> |
|
|
{% if parent_link %}<p><a href='{{ parent_link }}'>[Parent]</a></p>{% endif %} |
|
|
{% if directories %}<h2>Folders</h2><ul>{% for d in directories %} |
|
|
<li><a href='{{ url_for('browse', req_path=d.link) }}'>{{ d.name }}</a></li>{% endfor %}</ul>{% endif %} |
|
|
{% if files %}<h2>HTML Files</h2><ul>{% for f in files %} |
|
|
<li><a href='{{ url_for('browse', req_path=f.link) }}'>{{ f.name }}</a></li>{% endfor %}</ul>{% endif %} |
|
|
{% if html_content %}<div class='content'>{{ html_content|safe }}</div>{% endif %} |
|
|
</body></html> |
|
|
""" |
|
|
|
|
|
@app.route("/browse/", defaults={"req_path": ""}) |
|
|
@app.route("/browse/<path:req_path>") |
|
|
def browse(req_path): |
|
|
if req_path and req_path.split(os.sep)[0] not in ALLOWED_ROOTS: |
|
|
abort(404) |
|
|
full = Path(req_path).resolve() |
|
|
if not full.exists(): |
|
|
abort(404) |
|
|
|
|
|
if full.is_dir(): |
|
|
dirs, files = [], [] |
|
|
for e in sorted(full.iterdir()): |
|
|
if e.name.startswith("."): |
|
|
continue |
|
|
rel = str(e.relative_to(Path("."))) |
|
|
if e.is_dir(): |
|
|
dirs.append({"name": e.name, "link": rel}) |
|
|
elif e.suffix.lower() == ".html": |
|
|
files.append({"name": e.name, "link": rel}) |
|
|
parent = url_for("landing") if not req_path else url_for("browse", req_path=str(full.parent)) |
|
|
return render_template_string(BROWSER_TEMPLATE, |
|
|
parent_link=parent, |
|
|
directories=dirs, files=files, |
|
|
html_content=None) |
|
|
|
|
|
|
|
|
if full.suffix.lower() == ".html": |
|
|
return send_file(full) |
|
|
parent = url_for("browse", req_path=str(full.parent.relative_to(Path(".")))) |
|
|
txt = full.read_text(encoding="utf-8", errors="replace") |
|
|
return render_template_string(BROWSER_TEMPLATE, |
|
|
parent_link=parent, |
|
|
html_content=f"<pre>{txt}</pre>") |
|
|
|
|
|
|
|
|
@app.route("/save-stats", methods=["POST"]) |
|
|
def save_stats(): |
|
|
|
|
|
data = request.get_json(force=True, silent=True) or {} |
|
|
sid = data.get("sessionId") or gen_session_id() |
|
|
stats = {k: data.get(k) for k in ( |
|
|
"overallAccuracy","correctItemAccuracy","incorrectItemAccuracy", |
|
|
"avgTimeCorrect","avgTimeIncorrect","samples","subjective_feedback")} |
|
|
stats["timestamp"] = datetime.utcnow().isoformat() |
|
|
stats["session_id"]= sid |
|
|
stats["user_name"] = data.get("userName","anonymous") |
|
|
|
|
|
|
|
|
row = [ |
|
|
stats["timestamp"], sid, stats["user_name"], |
|
|
data.get("overallAccuracy"), data.get("correctItemAccuracy"), |
|
|
data.get("incorrectItemAccuracy"), data.get("avgTimeCorrect"), |
|
|
data.get("avgTimeIncorrect"), |
|
|
] |
|
|
need_header = not CSV_PATH.exists() |
|
|
with CSV_PATH.open("a", newline="") as f: |
|
|
w = csv.writer(f) |
|
|
if need_header: |
|
|
w.writerow(CSV_HEADER) |
|
|
w.writerow(row) |
|
|
|
|
|
|
|
|
path = save_session_local(sid, stats) |
|
|
if HF_TOKEN: |
|
|
push_to_hf(path, sid) |
|
|
log.info("new result pushed to database") |
|
|
SELECTED_CARD = get_the_min_interface() |
|
|
log.info("current selected card") |
|
|
log.info(SELECTED_CARD) |
|
|
return jsonify({"status":"ok"}) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
app.run(host="0.0.0.0", port=int(os.getenv("PORT", 7860)), debug=False) |