SophieA17
/

Sophie0-Reasoning-GRPO

Model card Files Files and versions

SophieA17 commited on Jun 4

Commit

f225fc7

·

verified ·

1 Parent(s): edd20a2

Update README.md

Files changed (1) hide show

README.md +13 -3

README.md CHANGED Viewed

@@ -1,4 +1,14 @@
-Sophie0-SFT
 ### Introduction
@@ -18,8 +28,8 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
-model: AutoModelForCausalLM = AutoModelForCausalLM.from_pretrained("SophieA17/Sophie0-SFT", trust_remote_code=True)
-tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained("SophieA17/Sophie0-SFT", trust_remote_code=True)
 model = model.to(device="cuda:0", dtype=torch.bfloat16)
 inputs = [

+---
+license: apache-2.0
+datasets:
+- K-and-K/knights-and-knaves
+language:
+- en
+- zh
+base_model:
+- SophieA17/Sophie0-Reasoning-SFT
+---
+Sophie0-Reasoning-GRPO
 ### Introduction
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
+model: AutoModelForCausalLM = AutoModelForCausalLM.from_pretrained("SophieA17/Sophie0-Reasoning-GRPO", trust_remote_code=True)
+tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained("SophieA17/Sophie0-Reasoning-GRPO", trust_remote_code=True)
 model = model.to(device="cuda:0", dtype=torch.bfloat16)
 inputs = [