ChauHPham's picture
Upload folder using huggingface_hub
25faba3 verified
import argparse
from sklearn.model_selection import train_test_split
from .config import load_config
from .datasets import DatasetLoader
from .models import DetectorModel
from .train import build_trainer
from .evaluate import evaluate
def train_command(args):
cfg = load_config(args.config)
loader = DatasetLoader(model_name=cfg.base_model, max_length=cfg.max_length)
df = loader.load(args.data)
train_df, val_df = train_test_split(df, test_size=0.2, random_state=cfg.seed, stratify=df["label"])
model = DetectorModel(model_name=cfg.base_model)
trainer = build_trainer(model.model, model.tokenizer, train_df, val_df, cfg)
trainer.train()
model.save(cfg.save_dir)
print(f"✅ Training complete. Model saved to: {cfg.save_dir}")
def eval_command(args):
cfg = load_config(args.config)
model = DetectorModel.load(args.model_path)
loader = DatasetLoader(model_name=model.model_name, max_length=cfg.max_length)
df = loader.load(args.data)
evaluate(model.model, model.tokenizer, df, max_length=cfg.max_length)
def main():
parser = argparse.ArgumentParser(
prog="ai-detector",
description="Detect whether text is AI- or human-written."
)
subparsers = parser.add_subparsers(dest="command", required=True)
# Train
p_train = subparsers.add_parser("train", help="Train a new detector model.")
p_train.add_argument("--data", required=True, help="Path to dataset CSV/JSON/JSONL.")
p_train.add_argument("--config", default="configs/default.yaml", help="YAML config path.")
p_train.set_defaults(func=train_command)
# Evaluate
p_eval = subparsers.add_parser("eval", help="Evaluate a trained model.")
p_eval.add_argument("--model-path", required=True, help="Path to saved model dir.")
p_eval.add_argument("--data", required=True, help="Path to dataset CSV/JSON/JSONL.")
p_eval.add_argument("--config", default="configs/default.yaml", help="YAML config path.")
p_eval.set_defaults(func=eval_command)
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()