Training in progress, step 100

Browse files

Files changed (11) hide show

README.md +10 -9
adapter_config.json +14 -6
adapter_model.safetensors +2 -2
added_tokens.json +3 -0
chat_template.jinja +47 -0
runs/Oct25_18-36-47_afc9ba227e8c/events.out.tfevents.1761417421.afc9ba227e8c.594.0 +3 -0
special_tokens_map.json +3 -4
tokenizer.json +2 -2
tokenizer.model +3 -0
tokenizer_config.json +0 -0
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: mustafaaljadery/gemma-2B-10M
 library_name: transformers
 model_name: Quotes_Generator
 tags:
@@ -11,7 +11,7 @@ licence: license
 # Model Card for Quotes_Generator
-This model is a fine-tuned version of [mustafaaljadery/gemma-2B-10M](https://huggingface.co/mustafaaljadery/gemma-2B-10M).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -27,17 +27,18 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/not-lain/huggingface/runs/gxswt71k)
 This model was trained with SFT.
 ### Framework versions
-- TRL: 0.12.2
-- Transformers: 4.46.3
-- Pytorch: 2.5.1+cu121
-- Datasets: 3.2.0
-- Tokenizers: 0.20.3
 ## Citations
@@ -48,7 +49,7 @@ Cite TRL as:
 ```bibtex
 @misc{vonwerra2022trl,
 	title        = {{TRL: Transformer Reinforcement Learning}},
-	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
 	year         = 2020,
 	journal      = {GitHub repository},
 	publisher    = {GitHub},

 ---
+base_model: google/gemma-3-1b-it
 library_name: transformers
 model_name: Quotes_Generator
 tags:
 # Model Card for Quotes_Generator
+This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/not-lain/huggingface/runs/16bwzxjk)
 This model was trained with SFT.
 ### Framework versions
+- TRL: 0.24.0
+- Transformers: 4.57.1
+- Pytorch: 2.8.0+cu126
+- Datasets: 4.0.0
+- Tokenizers: 0.22.1
 ## Citations
 ```bibtex
 @misc{vonwerra2022trl,
 	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
 	year         = 2020,
 	journal      = {GitHub repository},
 	publisher    = {GitHub},

adapter_config.json CHANGED Viewed

@@ -1,8 +1,11 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "mustafaaljadery/gemma-2B-10M",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
@@ -11,24 +14,29 @@
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 8,
   "lora_dropout": 0.0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "gate_proj",
-    "v_proj",
     "o_proj",
-    "up_proj",
     "down_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
 }

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-3-1b-it",
   "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 8,
+  "lora_bias": false,
   "lora_dropout": 0.0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "qalora_group_size": 16,
   "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
     "o_proj",
     "down_proj",
+    "q_proj",
+    "v_proj",
+    "gate_proj",
+    "up_proj"
   ],
+  "target_parameters": null,
   "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e543d3bf88e583c73f5d9d605818cf4076adbb1e7a5eb2f7f56ecc448e20b7cc
-size 39256456

 version https://git-lfs.github.com/spec/v1
+oid sha256:684fd7ad6b94d5dffa2870e75b48534f34b74495df45687e03adb6e7345bcf96
+size 26139264

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}

runs/Oct25_18-36-47_afc9ba227e8c/events.out.tfevents.1761417421.afc9ba227e8c.594.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8fb001d29ffe30ca47dfacff3eb8299debe85a09be279fde7754c52c6ee3a84a
+size 44801

special_tokens_map.json CHANGED Viewed

@@ -1,8 +1,5 @@
 {
-  "additional_special_tokens": [
-    "<start_of_turn>",
-    "<end_of_turn>"
-  ],
   "bos_token": {
     "content": "<bos>",
     "lstrip": false,
@@ -10,6 +7,7 @@
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
     "content": "<eos>",
     "lstrip": false,
@@ -17,6 +15,7 @@
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
     "content": "<pad>",
     "lstrip": false,

 {
+  "boi_token": "<start_of_image>",
   "bos_token": {
     "content": "<bos>",
     "lstrip": false,
     "rstrip": false,
     "single_word": false
   },
+  "eoi_token": "<end_of_image>",
   "eos_token": {
     "content": "<eos>",
     "lstrip": false,
     "rstrip": false,
     "single_word": false
   },
+  "image_token": "<image_soft_token>",
   "pad_token": {
     "content": "<pad>",
     "lstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0
-size 34356041

 version https://git-lfs.github.com/spec/v1
+oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
+size 33384568

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74c699a763d23d0833417bf13c29c44be96fbffa3ab58bbc3a439baf5d89343d
-size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:70e1a64099f950635da1660f6a046f9e158a52e04beb0051ef2ab9f1056e411a
+size 6225