Upload folder using huggingface_hub
Browse files- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +7 -21
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -20,10 +20,10 @@
|
|
| 20 |
"rank_pattern": {},
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
-
"qkv_proj",
|
| 24 |
"gate_up_proj",
|
| 25 |
-
"
|
| 26 |
-
"
|
|
|
|
| 27 |
],
|
| 28 |
"task_type": "CAUSAL_LM",
|
| 29 |
"use_dora": false,
|
|
|
|
| 20 |
"rank_pattern": {},
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
|
|
|
| 23 |
"gate_up_proj",
|
| 24 |
+
"down_proj",
|
| 25 |
+
"qkv_proj",
|
| 26 |
+
"o_proj"
|
| 27 |
],
|
| 28 |
"task_type": "CAUSAL_LM",
|
| 29 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 805341552
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2b650eafd945cf76bffa8082697fd3ff833a2acb3c1f999087552eb6809ceb6
|
| 3 |
size 805341552
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 409252709
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbea9603a80fb64b1c940be7e3d68c3c2149fd9d9ead8a78bab625f3cb19fadf
|
| 3 |
size 409252709
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef51e16c6a17540db1d871c8a331f96802a104e1203e4600237ca5e9b68c6be2
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:929791789119226e566ddaa2c943c7df8db37a661c8a6885321f70964d94fc76
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -3,35 +3,21 @@
|
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
"epoch": 2.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
-
{
|
| 12 |
-
"epoch": 0.6666666666666666,
|
| 13 |
-
"grad_norm": 0.43741512298583984,
|
| 14 |
-
"learning_rate": 0.0003880691476669283,
|
| 15 |
-
"loss": 1.4419,
|
| 16 |
-
"step": 20
|
| 17 |
-
},
|
| 18 |
{
|
| 19 |
"epoch": 1.3333333333333333,
|
| 20 |
-
"grad_norm": 0.
|
| 21 |
-
"learning_rate": 0.
|
| 22 |
-
"loss": 1.
|
| 23 |
-
"step":
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"epoch": 2.0,
|
| 27 |
-
"grad_norm": 0.2239806205034256,
|
| 28 |
-
"learning_rate": 0.00016631534900011212,
|
| 29 |
-
"loss": 1.0725,
|
| 30 |
-
"step": 60
|
| 31 |
}
|
| 32 |
],
|
| 33 |
"logging_steps": 20,
|
| 34 |
-
"max_steps":
|
| 35 |
"num_input_tokens_seen": 0,
|
| 36 |
"num_train_epochs": 3,
|
| 37 |
"save_steps": 500,
|
|
@@ -47,7 +33,7 @@
|
|
| 47 |
"attributes": {}
|
| 48 |
}
|
| 49 |
},
|
| 50 |
-
"total_flos":
|
| 51 |
"train_batch_size": 4,
|
| 52 |
"trial_name": null,
|
| 53 |
"trial_params": null
|
|
|
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
"epoch": 2.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 30,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
{
|
| 12 |
"epoch": 1.3333333333333333,
|
| 13 |
+
"grad_norm": 0.3045049011707306,
|
| 14 |
+
"learning_rate": 0.0002838031430723217,
|
| 15 |
+
"loss": 1.2965,
|
| 16 |
+
"step": 20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
}
|
| 18 |
],
|
| 19 |
"logging_steps": 20,
|
| 20 |
+
"max_steps": 45,
|
| 21 |
"num_input_tokens_seen": 0,
|
| 22 |
"num_train_epochs": 3,
|
| 23 |
"save_steps": 500,
|
|
|
|
| 33 |
"attributes": {}
|
| 34 |
}
|
| 35 |
},
|
| 36 |
+
"total_flos": 2.8436606201880576e+16,
|
| 37 |
"train_batch_size": 4,
|
| 38 |
"trial_name": null,
|
| 39 |
"trial_params": null
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e26a59f4d05a449a690d742d95dd40bfe537cb91ad5617a10895ea855b3bba8f
|
| 3 |
size 5905
|