apriasmoro
/

8cacfb30-a9e0-4619-b2d9-19939c9c7845

Text Generation

text-generation-inference

Model card Files Files and versions

8cacfb30-a9e0-4619-b2d9-19939c9c7845 / last-checkpoint /trainer_state.json

apriasmoro's picture

Training in progress, step 20, checkpoint

c6088a3 verified 4 months ago

4.01 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.01990049751243781,
	"eval_steps": 500,
	"global_step": 20,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0009950248756218905,
	"grad_norm": 0.8776684999465942,
	"learning_rate": 0.0,
	"loss": 1.6946,
	"step": 1
	},
	{
	"epoch": 0.001990049751243781,
	"grad_norm": 0.9731295704841614,
	"learning_rate": 7e-07,
	"loss": 1.852,
	"step": 2
	},
	{
	"epoch": 0.0029850746268656717,
	"grad_norm": 0.9005614519119263,
	"learning_rate": 1.4e-06,
	"loss": 1.7867,
	"step": 3
	},
	{
	"epoch": 0.003980099502487562,
	"grad_norm": 0.974780261516571,
	"learning_rate": 2.1e-06,
	"loss": 1.7229,
	"step": 4
	},
	{
	"epoch": 0.004975124378109453,
	"grad_norm": 0.711536169052124,
	"learning_rate": 2.8e-06,
	"loss": 1.6955,
	"step": 5
	},
	{
	"epoch": 0.005970149253731343,
	"grad_norm": 0.7311899065971375,
	"learning_rate": 3.5e-06,
	"loss": 1.8664,
	"step": 6
	},
	{
	"epoch": 0.006965174129353234,
	"grad_norm": 0.7126452326774597,
	"learning_rate": 4.2e-06,
	"loss": 1.8133,
	"step": 7
	},
	{
	"epoch": 0.007960199004975124,
	"grad_norm": 0.7019472122192383,
	"learning_rate": 4.9e-06,
	"loss": 1.6724,
	"step": 8
	},
	{
	"epoch": 0.008955223880597015,
	"grad_norm": 0.7028383016586304,
	"learning_rate": 5.6e-06,
	"loss": 1.7337,
	"step": 9
	},
	{
	"epoch": 0.009950248756218905,
	"grad_norm": 0.6948546767234802,
	"learning_rate": 6.299999999999999e-06,
	"loss": 1.6073,
	"step": 10
	},
	{
	"epoch": 0.010945273631840797,
	"grad_norm": 0.6322774291038513,
	"learning_rate": 7e-06,
	"loss": 1.6254,
	"step": 11
	},
	{
	"epoch": 0.011940298507462687,
	"grad_norm": 0.5230722427368164,
	"learning_rate": 7.699999999999999e-06,
	"loss": 1.704,
	"step": 12
	},
	{
	"epoch": 0.012935323383084577,
	"grad_norm": 0.38045769929885864,
	"learning_rate": 8.4e-06,
	"loss": 1.5992,
	"step": 13
	},
	{
	"epoch": 0.013930348258706468,
	"grad_norm": 0.43926432728767395,
	"learning_rate": 9.1e-06,
	"loss": 1.3649,
	"step": 14
	},
	{
	"epoch": 0.014925373134328358,
	"grad_norm": 0.6113471388816833,
	"learning_rate": 9.8e-06,
	"loss": 1.7055,
	"step": 15
	},
	{
	"epoch": 0.015920398009950248,
	"grad_norm": 0.549103856086731,
	"learning_rate": 1.05e-05,
	"loss": 1.6179,
	"step": 16
	},
	{
	"epoch": 0.01691542288557214,
	"grad_norm": 0.39344537258148193,
	"learning_rate": 1.12e-05,
	"loss": 1.517,
	"step": 17
	},
	{
	"epoch": 0.01791044776119403,
	"grad_norm": 0.3620043098926544,
	"learning_rate": 1.19e-05,
	"loss": 1.4598,
	"step": 18
	},
	{
	"epoch": 0.01890547263681592,
	"grad_norm": 0.271251380443573,
	"learning_rate": 1.2599999999999998e-05,
	"loss": 1.441,
	"step": 19
	},
	{
	"epoch": 0.01990049751243781,
	"grad_norm": 0.2406337559223175,
	"learning_rate": 1.33e-05,
	"loss": 1.5409,
	"step": 20
	}
	],
	"logging_steps": 1,
	"max_steps": 20,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.5380973908656128e+16,
	"train_batch_size": 24,
	"trial_name": null,
	"trial_params": null
	}