Model save

Files changed (4) hide show

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.0022241791886344797,
-    "train_runtime": 11520.3381,
     "train_samples": 374,
-    "train_samples_per_second": 0.26,
-    "train_steps_per_second": 0.043
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.019628612981648565,
+    "train_runtime": 7912.6404,
     "train_samples": 374,
+    "train_samples_per_second": 0.379,
+    "train_steps_per_second": 0.063
 }

config.json CHANGED Viewed

@@ -25,6 +25,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.48.1",
-  "use_cache": false,
   "vocab_size": 32016
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.48.1",
+  "use_cache": true,
   "vocab_size": 32016
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.0022241791886344797,
-    "train_runtime": 11520.3381,
     "train_samples": 374,
-    "train_samples_per_second": 0.26,
-    "train_steps_per_second": 0.043
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.019628612981648565,
+    "train_runtime": 7912.6404,
     "train_samples": 374,
+    "train_samples_per_second": 0.379,
+    "train_steps_per_second": 0.063
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff