|
|
--- |
|
|
library_name: peft |
|
|
model_name: SmolLM3-3B-Instruct-Anime |
|
|
tags: |
|
|
- base_model:adapter:HuggingFaceTB/SmolLM3-3B-Base |
|
|
- lora |
|
|
- sft |
|
|
- transformers |
|
|
- trl |
|
|
licence: license |
|
|
pipeline_tag: text-generation |
|
|
base_model: HuggingFaceTB/SmolLM3-3B-Base |
|
|
--- |
|
|
|
|
|
# Model Card for SmolLM3-3B-Instruct-Anime |
|
|
|
|
|
This model is a fine-tuned version of [HuggingFaceTB/SmolLM3-3B-Base](https://huggingface.co/HuggingFaceTB/SmolLM3-3B-Base). |
|
|
It was trained using [zerofata/Instruct-Anime](https://huggingface.co/datasets/zerofata/Instruct-Anime). |
|
|
|
|
|
## Quick start |
|
|
|
|
|
```python |
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
from peft import PeftModel |
|
|
|
|
|
# Define the paths |
|
|
base_model_path = "./SmolLM3-3B-Base/" |
|
|
adapter_path = "./SmolLM3-3B-Instruct-Anime/" |
|
|
|
|
|
# Load the base model and the tokenizer in bf16 |
|
|
print("Loading base model and tokenizer...") |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
base_model_path, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="auto", |
|
|
) |
|
|
tokenizer = AutoTokenizer.from_pretrained(base_model_path) |
|
|
|
|
|
# Load the LoRA adapter and merge it into the base model |
|
|
print("Loading LoRA adapter and merging...") |
|
|
model = PeftModel.from_pretrained(base_model, adapter_path) |
|
|
model = model.merge_and_unload() # Merge the weights |
|
|
|
|
|
# Create the text generation pipeline |
|
|
print("Creating pipeline...") |
|
|
generator = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
|
|
|
# Your question |
|
|
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why? " |
|
|
|
|
|
# Format the prompt using the chat template |
|
|
# We need to load the template file just like in the training script |
|
|
with open("chat_template.jinja", "r") as f: |
|
|
chat_template = f.read() |
|
|
tokenizer.chat_template = chat_template |
|
|
|
|
|
prompt = tokenizer.apply_chat_template([{"role": "user", "content": question}], tokenize=False, add_generation_prompt=True) |
|
|
|
|
|
# Generate the output |
|
|
print("Generating response...") |
|
|
output = generator(prompt, max_new_tokens=2048, return_full_text=False) |
|
|
print("--- Model Response ---") |
|
|
print(output[0]["generated_text"]) |
|
|
``` |
|
|
|
|
|
## Training procedure |
|
|
|
|
|
This model was trained with SFT. |
|
|
|
|
|
```python |
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
from datasets import load_dataset |
|
|
from peft import LoraConfig |
|
|
from trl import SFTTrainer, SFTConfig |
|
|
import trackio |
|
|
|
|
|
# --- Configuration --- |
|
|
model_name = "./SmolLM3-3B-Base/" |
|
|
dataset_path = "./Instruct-Anime/instruct_dataset.jsonl" |
|
|
output_dir = "./SmolLM3-3B-Instruct-Anime" |
|
|
project_name = "smollm3-sft-anime" |
|
|
|
|
|
# --- 1. Initialize Trackio --- |
|
|
trackio.init(project=project_name) |
|
|
|
|
|
# --- 2. Load the model and the tokenizer --- |
|
|
print("Loading the model and the tokenizer...") |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
device_map="auto", |
|
|
dtype=torch.bfloat16, |
|
|
low_cpu_mem_usage=True, |
|
|
trust_remote_code=True, |
|
|
attn_implementation="flash_attention_2", |
|
|
) |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
# Add a pad token if it's missing |
|
|
if tokenizer.pad_token is None: |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
model.config.pad_token_id = model.config.eos_token_id |
|
|
|
|
|
# Load and set the chat template from the jinja file |
|
|
with open("chat_template.jinja", "r") as f: |
|
|
chat_template = f.read() |
|
|
tokenizer.chat_template = chat_template |
|
|
print("The chat template has been loaded from chat_template.jinja and set on the tokenizer.") |
|
|
|
|
|
|
|
|
# --- Enable gradient checkpointing --- |
|
|
print("Enabling Gradient Checkpointing...") |
|
|
model.gradient_checkpointing_enable() |
|
|
|
|
|
|
|
|
# --- 3. Load and process the dataset --- |
|
|
print("Loading and processing the dataset...") |
|
|
dataset = load_dataset("json", data_files=dataset_path, split="train") |
|
|
|
|
|
def formatting_prompts_func(example): |
|
|
# This function formats the chat messages into a single string |
|
|
# by applying the model's chat template. |
|
|
text = tokenizer.apply_chat_template(example['messages'], tokenize=False) |
|
|
example['text'] = text |
|
|
return example |
|
|
|
|
|
dataset = dataset.map(formatting_prompts_func, remove_columns=["messages", "source"]) |
|
|
print(f"Dataset loaded and formatted with {len(dataset)} examples.") |
|
|
|
|
|
|
|
|
# --- 4. Configure the LoRA --- |
|
|
print("Configuring the LoRA...") |
|
|
peft_config = LoraConfig( |
|
|
r=8, |
|
|
lora_alpha=16, |
|
|
lora_dropout=0.1, |
|
|
target_modules=['q_proj', 'v_proj'], # From the test script |
|
|
bias="none", |
|
|
task_type="CAUSAL_LM", |
|
|
) |
|
|
|
|
|
# --- 5. Configure training --- |
|
|
# Balanced learning rate and batch size for a GPU with ~24GB VRAM |
|
|
print("Configuring training arguments...") |
|
|
training_args = SFTConfig( |
|
|
output_dir=output_dir, |
|
|
num_train_epochs=5, # Train for a total of 5 epochs |
|
|
per_device_train_batch_size=2, |
|
|
gradient_accumulation_steps=8, |
|
|
optim="paged_adamw_8bit", |
|
|
learning_rate=1e-4, |
|
|
lr_scheduler: rex, |
|
|
warmup_steps=50, |
|
|
logging_steps=8, |
|
|
save_total_limit=5, # Keep best + last few checkpoints |
|
|
load_best_model_at_end=true, |
|
|
save_strategy="steps", |
|
|
report_to="trackio", |
|
|
packing=True, |
|
|
max_length=5120, |
|
|
metric_for_best_model=eval_loss, |
|
|
greater_is_better=false |
|
|
) |
|
|
|
|
|
# --- 6. Create and run the trainer --- |
|
|
print("Creating SFTTrainer...") |
|
|
trainer = SFTTrainer( |
|
|
model=model, |
|
|
args=training_args, |
|
|
train_dataset=dataset, |
|
|
peft_config=peft_config, |
|
|
# The trainer will automatically use the 'text' column |
|
|
) |
|
|
|
|
|
print("Starting training...") |
|
|
trainer.train() #resume_from_checkpoint=True |
|
|
|
|
|
# --- 7. Save the final adapter --- |
|
|
print("Training has finished. Saving the adapter.") |
|
|
trainer.save_model(output_dir) |
|
|
|
|
|
print(f"The LoRA adapter saved to {output_dir}") |
|
|
trackio.finish() |
|
|
``` |
|
|
|
|
|
### Framework versions |
|
|
|
|
|
- PEFT 0.17.1 |
|
|
- TRL: 0.23.0 |
|
|
- Transformers: 4.56.2 |
|
|
- Pytorch: 2.8.0+cu126 |
|
|
- Datasets: 4.1.1 |
|
|
- Tokenizers: 0.22.1 |
|
|
|
|
|
## Citations |
|
|
|
|
|
Cite TRL as: |
|
|
|
|
|
```bibtex |
|
|
@misc{vonwerra2022trl, |
|
|
title = {{TRL: Transformer Reinforcement Learning}}, |
|
|
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, |
|
|
year = 2020, |
|
|
journal = {GitHub repository}, |
|
|
publisher = {GitHub}, |
|
|
howpublished = {\url{https://github.com/huggingface/trl}} |
|
|
} |
|
|
``` |
|
|
|