Commit
·
ef4a3ca
1
Parent(s):
751f3ef
Training in progress, step 500
Browse files- all_results.json +9 -9
- post-training eval_results.json +5 -5
- prediction_output.jsonl +1 -1
- pytorch_model.bin +1 -1
- train_results.json +4 -4
- trainer_state.json +186 -186
- training_args.bin +1 -1
all_results.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 5.0,
|
| 3 |
"eval_samples": 98,
|
| 4 |
-
"test_f1": 0.
|
| 5 |
-
"test_loss": 1.
|
| 6 |
-
"test_runtime": 3.
|
| 7 |
-
"test_samples_per_second": 29.
|
| 8 |
-
"test_steps_per_second": 29.
|
| 9 |
-
"train_loss": 0.
|
| 10 |
-
"train_runtime":
|
| 11 |
"train_samples": 702,
|
| 12 |
-
"train_samples_per_second": 4.
|
| 13 |
-
"train_steps_per_second": 4.
|
| 14 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 5.0,
|
| 3 |
"eval_samples": 98,
|
| 4 |
+
"test_f1": 0.5515045914952008,
|
| 5 |
+
"test_loss": 1.7193970680236816,
|
| 6 |
+
"test_runtime": 3.325,
|
| 7 |
+
"test_samples_per_second": 29.474,
|
| 8 |
+
"test_steps_per_second": 29.474,
|
| 9 |
+
"train_loss": 0.6706694952103487,
|
| 10 |
+
"train_runtime": 824.1732,
|
| 11 |
"train_samples": 702,
|
| 12 |
+
"train_samples_per_second": 4.259,
|
| 13 |
+
"train_steps_per_second": 4.259
|
| 14 |
}
|
post-training eval_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"eval_samples": 98,
|
| 3 |
-
"test_f1": 0.
|
| 4 |
-
"test_loss": 1.
|
| 5 |
-
"test_runtime": 3.
|
| 6 |
-
"test_samples_per_second": 29.
|
| 7 |
-
"test_steps_per_second": 29.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"eval_samples": 98,
|
| 3 |
+
"test_f1": 0.5515045914952008,
|
| 4 |
+
"test_loss": 1.7193970680236816,
|
| 5 |
+
"test_runtime": 3.325,
|
| 6 |
+
"test_samples_per_second": 29.474,
|
| 7 |
+
"test_steps_per_second": 29.474
|
| 8 |
}
|
prediction_output.jsonl
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
[{"pred": 10.053011894226074, "label": 0.0}, {"pred": 1.2265956401824951, "label": 5.0}, {"pred": -0.5370747447013855, "label": 2.0}, {"pred": 0.1752518266439438, "label": 4.0}, {"pred": -2.031033754348755, "label": 2.0}, {"pred": -3.3496036529541016, "label": 8.0}, {"pred": -1.9880871772766113, "label": 8.0}, {"pred": -3.086259126663208, "label": 2.0}, {"pred": -1.1470110416412354, "label": 8.0}, {"pred": 1.037685513496399, "label": 2.0}, {"pred": -1.913681149482727, "label": 2.0}, {"pred": 0.26967066526412964, "label": 2.0}, {"pred": 5.334761142730713, "label": 5.0}, {"pred": -1.662973165512085, "label": 5.0}, {"pred": 2.8667726516723633, "label": 5.0}, {"pred": -0.8025294542312622, "label": 5.0}, {"pred": -1.7527247667312622, "label": 5.0}, {"pred": -3.357849359512329, "label": 6.0}, {"pred": 1.181986927986145, "label": 6.0}, {"pred": 1.7410268783569336, "label": 5.0}, {"pred": 7.933586597442627, "label": 8.0}, {"pred": 3.3055169582366943, "label": 8.0}, {"pred": -2.8542656898498535, "label": 2.0}, {"pred": -1.19741690158844, "label": 5.0}, {"pred": -4.84285306930542, "label": 5.0}, {"pred": -2.683229923248291, "label": 5.0}, {"pred": -2.0367188453674316, "label": 2.0}, {"pred": -0.3946422338485718, "label": 2.0}, {"pred": 1.2412188053131104, "label": 2.0}, {"pred": 8.913151741027832, "label": 3.0}, {"pred": 2.1114447116851807, "label": 3.0}, {"pred": -0.3422248363494873, "label": 5.0}, {"pred": -1.5180871486663818, "label": 0.0}, {"pred": -3.858212471008301, "label": 2.0}, {"pred": -3.058220624923706, "label": 3.0}, {"pred": -2.8904712200164795, "label": 3.0}, {"pred": -3.0549261569976807, "label": 3.0}, {"pred": -0.9003552794456482, "label": 7.0}, {"pred": 9.505762100219727, "label": 8.0}, {"pred": -0.5478419065475464, "label": 7.0}, {"pred": -0.37932345271110535, "label": 7.0}, {"pred": -0.5652313828468323, "label": 2.0}, {"pred": -3.1210832595825195, "label": 2.0}, {"pred": -2.2201759815216064, "label": 3.0}, {"pred": -0.22374841570854187, "label": 8.0}, {"pred": -0.4147443473339081, "label": 0.0}, {"pred": -2.522362232208252, "label": 2.0}, {"pred": -0.3626328706741333, "label": 2.0}, {"pred": -0.5300700664520264, "label": 2.0}, {"pred": -0.03919024020433426, "label": 2.0}, {"pred": -0.5216307044029236, "label": 2.0}, {"pred": -2.4257967472076416, "label": 2.0}, {"pred": -0.43984726071357727, "label": 0.0}, {"pred": 7.890976428985596, "label": 7.0}, {"pred": -0.8893976211547852, "label": 7.0}, {"pred": -2.06447434425354, "label": 7.0}, {"pred": -1.5249279737472534, "label": 7.0}, {"pred": -0.5461889505386353, "label": 8.0}, {"pred": -0.3898739814758301, "label": 8.0}, {"pred": -0.2674681544303894, "label": 8.0}, {"pred": -1.5843223333358765, "label": 3.0}, {"pred": -0.9582051038742065, "label": 6.0}, {"pred": 10.02863597869873, "label": 0.0}, {"pred": 2.1390559673309326, "label": 2.0}, {"pred": 0.5683819651603699, "label": 3.0}, {"pred": 7.809494495391846, "label": 7.0}, {"pred": 4.672691345214844, "label": 0.0}, {"pred": -3.1617870330810547, "label": 5.0}, {"pred": -2.968081474304199, "label": 2.0}, {"pred": -4.236698627471924, "label": 2.0}, {"pred": -2.237872362136841, "label": 7.0}, {"pred": -2.4456675052642822, "label": 2.0}, {"pred": 0.0029354728758335114, "label": 2.0}, {"pred": -1.3975402116775513, "label": 2.0}, {"pred": -0.45201340317726135, "label": 8.0}, {"pred": -0.5411576628684998, "label": 8.0}, {"pred": -1.1021785736083984, "label": 3.0}, {"pred": -0.03798329457640648, "label": 0.0}, {"pred": -2.4011411666870117, "label": 0.0}, {"pred": -1.8443877696990967, "label": 2.0}, {"pred": 9.534435272216797, "label": 2.0}, {"pred": -2.261758804321289, "label": 2.0}, {"pred": -0.29918554425239563, "label": 8.0}, {"pred": 9.580008506774902, "label": 6.0}, {"pred": -1.6922998428344727, "label": 2.0}, {"pred": -0.11596175283193588, "label": 2.0}, {"pred": -1.1728023290634155, "label": 8.0}, {"pred": -2.624927282333374, "label": 0.0}, {"pred": -2.2074270248413086, "label": 2.0}, {"pred": 0.1966685950756073, "label": 3.0}, {"pred": -2.8291454315185547, "label": 3.0}, {"pred": -1.2406705617904663, "label": 2.0}, {"pred": 7.857271671295166, "label": 6.0}, {"pred": 3.400002956390381, "label": 0.0}, {"pred": -1.4394145011901855, "label": 0.0}, {"pred": 1.8180431127548218, "label": 5.0}, {"pred": -2.2545340061187744, "label": 5.0}, {"pred": -2.8155367374420166, "label": 5.0}, {"pred": -2.5808486938476562, "label": 5.0}, {"pred": 0.629129946231842, "label": 5.0}, {"pred": -0.1851099729537964, "label": 2.0}, {"pred": 0.6262603998184204, "label": 2.0}, {"pred": 0.04001408815383911, "label": 2.0}, {"pred": -2.094447612762451, "label": 5.0}, {"pred": 1.2242151498794556, "label": 4.0}, {"pred": 6.133328914642334, "label": 8.0}, {"pred": -3.5184497833251953, "label": 4.0}, {"pred": -2.7859067916870117, "label": 4.0}, {"pred": -2.7572460174560547, "label": 4.0}, {"pred": -2.47908616065979, "label": 8.0}, {"pred": -0.5159931182861328, "label": 2.0}, {"pred": 0.07365216314792633, "label": 3.0}, {"pred": 4.490240573883057, "label": 2.0}, {"pred": 6.107156276702881, "label": 5.0}, {"pred": -2.4094760417938232, "label": 5.0}, {"pred": -2.298379898071289, "label": 5.0}, {"pred": -3.0487470626831055, "label": 0.0}, {"pred": -1.3762894868850708, "label": 6.0}, {"pred": -2.8625693321228027, "label": 5.0}, {"pred": -1.4980090856552124, "label": 5.0}, {"pred": -0.9184617400169373, "label": 5.0}, {"pred": -0.7868920564651489, "label": 2.0}, {"pred": 4.807824611663818, "label": 2.0}, {"pred": 4.986772060394287, "label": 2.0}, {"pred": -2.0342037677764893, "label": 2.0}, {"pred": -0.8398276567459106, "label": 2.0}, {"pred": -1.2112452983856201, "label": 2.0}, {"pred": -2.7561631202697754, "label": 5.0}, {"pred": -2.6678245067596436, "label": 5.0}, {"pred": -0.8507716059684753, "label": 6.0}, {"pred": 1.9779833555221558, "label": 6.0}, {"pred": 7.1783857345581055, "label": 0.0}, {"pred": 2.3628337383270264, "label": 2.0}, {"pred": -1.8503243923187256, "label": 2.0}, {"pred": -3.973742961883545, "label": 6.0}, {"pred": -0.961530327796936, "label": 2.0}, {"pred": -2.3103420734405518, "label": 5.0}, {"pred": 0.1533740609884262, "label": 5.0}, {"pred": 0.945532500743866, "label": 6.0}, {"pred": -1.9985706806182861, "label": 2.0}, {"pred": 2.2117724418640137, "label": 5.0}, {"pred": 6.319922924041748, "label": 2.0}, {"pred": -2.8318819999694824, "label": 8.0}, {"pred": -3.428406238555908, "label": 2.0}, {"pred": -2.51615047454834, "label": 2.0}, {"pred": -2.9052319526672363, "label": 5.0}, {"pred": 0.7762099504470825, "label": 8.0}, {"pred": 0.3957656919956207, "label": 5.0}, {"pred": -1.3153777122497559, "label": 5.0}, {"pred": 4.028310298919678, "label": 5.0}, {"pred": 5.811086177825928, "label": 5.0}, {"pred": -2.535435914993286, "label": 5.0}, {"pred": -3.497481107711792, "label": 6.0}, {"pred": 0.04644595459103584, "label": 8.0}, {"pred": -0.3115100562572479, "label": 6.0}, {"pred": 0.016584614291787148, "label": 7.0}, {"pred": -0.7723070979118347, "label": 6.0}, {"pred": -1.5444141626358032, "label": 8.0}, {"pred": -1.141904354095459, "label": 0.0}, {"pred": 8.480439186096191, "label": 2.0}, {"pred": -2.3408584594726562, "label": 6.0}, {"pred": -1.7245216369628906, "label": 5.0}, {"pred": 1.5875722169876099, "label": 5.0}, {"pred": -0.9754578471183777, "label": 2.0}, {"pred": -2.217231035232544, "label": 2.0}, {"pred": -0.6310365796089172, "label": 8.0}, {"pred": -0.9391398429870605, "label": 8.0}, {"pred": -1.012092113494873, "label": 0.0}, {"pred": 8.798238754272461, "label": 2.0}, {"pred": -3.1681442260742188, "label": 0.0}, {"pred": -0.884898841381073, "label": 7.0}, {"pred": 1.2964022159576416, "label": 2.0}, {"pred": -2.4020252227783203, "label": 2.0}, {"pred": -3.692897319793701, "label": 8.0}, {"pred": 0.48183152079582214, "label": 6.0}, {"pred": -0.42652153968811035, "label": 6.0}, {"pred": 1.3932526111602783, "label": 5.0}, {"pred": 7.640524864196777, "label": 5.0}, {"pred": -3.08962082862854, "label": 5.0}, {"pred": -1.635117530822754, "label": 5.0}, {"pred": -0.565514326095581, "label": 5.0}, {"pred": -2.547136068344116, "label": 5.0}, {"pred": -0.6833630204200745, "label": 5.0}, {"pred": -0.7127103209495544, "label": 6.0}, {"pred": -0.14554797112941742, "label": 8.0}, {"pred": -0.12966954708099365, "label": 5.0}, {"pred": -2.2960150241851807, "label": 5.0}, {"pred": -0.6724411249160767, "label": 3.0}, {"pred": 8.128778457641602, "label": 3.0}, {"pred": -1.071781873703003, "label": 6.0}, {"pred": -1.9194085597991943, "label": 2.0}, {"pred": -1.54249906539917, "label": 3.0}, {"pred": -0.6316580176353455, "label": 2.0}, {"pred": -0.36814042925834656, "label": 2.0}, {"pred": -0.3363884389400482, "label": 2.0}, {"pred": -1.6689341068267822, "label": 0.0}, {"pred": -1.132188320159912, "label": 2.0}, {"pred": 10.208016395568848, "label": 5.0}, {"pred": -0.5401054620742798, "label": 5.0}, {"pred": 0.09691311419010162, "label": 5.0}, {"pred": 8.53925609588623, "label": 2.0}, {"pred": 4.391894340515137, "label": 6.0}, {"pred": -3.2388341426849365, "label": 3.0}, {"pred": -2.586022138595581, "label": 3.0}, {"pred": -1.3518240451812744, "label": 6.0}, {"pred": -2.103663444519043, "label": 3.0}, {"pred": -1.933193564414978, "label": 2.0}, {"pred": 1.1616668701171875, "label": 8.0}, {"pred": -1.3652011156082153, "label": 8.0}, {"pred": -4.48310661315918, "label": 0.0}, {"pred": -0.6386572122573853, "label": 1.0}, {"pred": 0.311795175075531, "label": 2.0}, {"pred": 2.243385076522827, "label": 3.0}, {"pred": 7.526029109954834, "label": 6.0}, {"pred": -3.0103445053100586, "label": 8.0}, {"pred": -1.4424251317977905, "label": 7.0}, {"pred": -2.952651262283325, "label": 0.0}, {"pred": -0.9500249028205872, "label": 8.0}, {"pred": 1.3159178495407104, "label": 2.0}, {"pred": 1.183246374130249, "label": 2.0}, {"pred": -2.141357183456421, "label": 5.0}, {"pred": 7.404512405395508, "label": 5.0}, {"pred": 1.0727956295013428, "label": 7.0}, {"pred": -3.514127492904663, "label": 2.0}, {"pred": -3.1212284564971924, "label": 2.0}, {"pred": 0.5527191758155823, "label": 8.0}, {"pred": -0.5971601605415344, "label": 5.0}, {"pred": 5.6161651611328125, "label": 5.0}, {"pred": 4.333760738372803, "label": 2.0}, {"pred": -3.9194581508636475, "label": 6.0}, {"pred": 2.415543556213379, "label": 6.0}, {"pred": -3.0811643600463867, "label": 2.0}, {"pred": -3.876741647720337, "label": 5.0}, {"pred": -1.4490783214569092, "label": 2.0}, {"pred": -2.84633207321167, "label": 5.0}, {"pred": -0.467058002948761, "label": 5.0}, {"pred": 9.251620292663574, "label": 8.0}, {"pred": -1.4969630241394043, "label": 5.0}, {"pred": -0.1453137993812561, "label": 2.0}, {"pred": -0.724045991897583, "label": 6.0}, {"pred": -2.866516351699829, "label": 6.0}, {"pred": -2.0785751342773438, "label": 5.0}, {"pred": 0.3289041221141815, "label": 6.0}, {"pred": -1.6521097421646118, "label": 2.0}, {"pred": -0.6320829391479492, "label": 2.0}, {"pred": 7.514566421508789, "label": 5.0}, {"pred": 4.6928019523620605, "label": 5.0}, {"pred": -3.0128238201141357, "label": 0.0}, {"pred": -1.0718092918395996, "label": 5.0}, {"pred": -3.47845721244812, "label": 2.0}, {"pred": 0.35053759813308716, "label": 2.0}, {"pred": -0.9066526293754578, "label": 2.0}, {"pred": -4.176146984100342, "label": 5.0}, {"pred": -2.598372220993042, "label": 2.0}, {"pred": 7.388974666595459, "label": 6.0}, {"pred": -0.4163927137851715, "label": 6.0}, {"pred": -1.299724817276001, "label": 6.0}, {"pred": 4.256181716918945, "label": 6.0}, {"pred": -0.24266250431537628, "label": 5.0}, {"pred": -2.206273317337036, "label": 5.0}, {"pred": -1.403360366821289, "label": 5.0}, {"pred": -2.5257246494293213, "label": 5.0}, {"pred": -2.7049272060394287, "label": 5.0}, {"pred": 4.336051940917969, "label": 2.0}, {"pred": 6.037196636199951, "label": 5.0}, {"pred": -3.0442888736724854, "label": 8.0}, {"pred": 1.1787348985671997, "label": 8.0}, {"pred": -0.4310672879219055, "label": 0.0}, {"pred": -0.17733784019947052, "label": 3.0}, {"pred": -1.1147769689559937, "label": 2.0}, {"pred": 0.8757383823394775, "label": 2.0}, {"pred": -1.8259029388427734, "label": 2.0}, {"pred": 0.1079450473189354, "label": 8.0}, {"pred": -1.332146406173706, "label": 8.0}, {"pred": -1.5691189765930176, "label": 0.0}, {"pred": 8.07284164428711, "label": 2.0}, {"pred": 1.6381406784057617, "label": 0.0}, {"pred": -2.8433923721313477, "label": 7.0}, {"pred": -3.629115581512451, "label": 0.0}, {"pred": -4.5651140213012695, "label": 0.0}, {"pred": -1.93071448802948, "label": 0.0}, {"pred": 1.3184444904327393, "label": 8.0}, {"pred": 1.7017006874084473, "label": 8.0}, {"pred": -1.1318089962005615, "label": 0.0}, {"pred": 6.206396579742432, "label": 0.0}, {"pred": 2.876361131668091, "label": 2.0}, {"pred": -1.8594180345535278, "label": 2.0}, {"pred": -2.608898878097534, "label": 0.0}]
|
|
|
|
| 1 |
+
[{"pred": 10.290809631347656, "label": 0.0}, {"pred": 0.9703102111816406, "label": 5.0}, {"pred": -1.5968129634857178, "label": 2.0}, {"pred": 0.7235668897628784, "label": 4.0}, {"pred": -3.0565438270568848, "label": 2.0}, {"pred": -2.3618180751800537, "label": 8.0}, {"pred": -1.6507198810577393, "label": 8.0}, {"pred": -2.724348783493042, "label": 2.0}, {"pred": -0.7913642525672913, "label": 8.0}, {"pred": -0.1787530481815338, "label": 2.0}, {"pred": -3.9603071212768555, "label": 2.0}, {"pred": 0.03501487895846367, "label": 2.0}, {"pred": 6.6571478843688965, "label": 5.0}, {"pred": -2.7573740482330322, "label": 5.0}, {"pred": 2.7549917697906494, "label": 5.0}, {"pred": -0.9166736602783203, "label": 5.0}, {"pred": 1.1418240070343018, "label": 5.0}, {"pred": -1.7485498189926147, "label": 6.0}, {"pred": 1.0153459310531616, "label": 6.0}, {"pred": 2.065824508666992, "label": 5.0}, {"pred": 7.917908191680908, "label": 8.0}, {"pred": 2.5800442695617676, "label": 8.0}, {"pred": -3.0683484077453613, "label": 2.0}, {"pred": 0.3347143530845642, "label": 5.0}, {"pred": -2.8323209285736084, "label": 5.0}, {"pred": -2.9949421882629395, "label": 5.0}, {"pred": -3.9032864570617676, "label": 2.0}, {"pred": -0.9693252444267273, "label": 2.0}, {"pred": 0.9653197526931763, "label": 2.0}, {"pred": 8.178954124450684, "label": 3.0}, {"pred": 3.3416378498077393, "label": 3.0}, {"pred": -1.3156547546386719, "label": 5.0}, {"pred": -1.044263243675232, "label": 0.0}, {"pred": -3.586060047149658, "label": 2.0}, {"pred": -1.6682804822921753, "label": 3.0}, {"pred": -3.6979148387908936, "label": 3.0}, {"pred": -3.4306604862213135, "label": 3.0}, {"pred": -0.3065505027770996, "label": 7.0}, {"pred": 10.175311088562012, "label": 8.0}, {"pred": -2.8946011066436768, "label": 7.0}, {"pred": -0.09801101684570312, "label": 7.0}, {"pred": 0.9370686411857605, "label": 2.0}, {"pred": -1.5044194459915161, "label": 2.0}, {"pred": -1.840678334236145, "label": 3.0}, {"pred": -0.3210545480251312, "label": 8.0}, {"pred": -1.5668078660964966, "label": 0.0}, {"pred": -1.9491629600524902, "label": 2.0}, {"pred": -0.9471103549003601, "label": 2.0}, {"pred": -1.2616881132125854, "label": 2.0}, {"pred": -1.0521950721740723, "label": 2.0}, {"pred": -0.00420457124710083, "label": 2.0}, {"pred": -1.8757306337356567, "label": 2.0}, {"pred": -1.201255440711975, "label": 0.0}, {"pred": 7.710162162780762, "label": 7.0}, {"pred": -2.4759974479675293, "label": 7.0}, {"pred": -2.5288870334625244, "label": 7.0}, {"pred": -0.9985426664352417, "label": 7.0}, {"pred": -1.529648780822754, "label": 8.0}, {"pred": -1.32073974609375, "label": 8.0}, {"pred": -0.8256033062934875, "label": 8.0}, {"pred": -1.5793683528900146, "label": 3.0}, {"pred": -1.1773693561553955, "label": 6.0}, {"pred": 9.405431747436523, "label": 0.0}, {"pred": 3.3469057083129883, "label": 2.0}, {"pred": 1.315601110458374, "label": 3.0}, {"pred": 6.369998931884766, "label": 7.0}, {"pred": 5.687730312347412, "label": 0.0}, {"pred": -3.7174370288848877, "label": 5.0}, {"pred": -3.179921865463257, "label": 2.0}, {"pred": -3.663841724395752, "label": 2.0}, {"pred": -1.97105872631073, "label": 7.0}, {"pred": -3.0646395683288574, "label": 2.0}, {"pred": -1.2003767490386963, "label": 2.0}, {"pred": -1.9717252254486084, "label": 2.0}, {"pred": -0.3366681635379791, "label": 8.0}, {"pred": -1.3070366382598877, "label": 8.0}, {"pred": -0.8485084176063538, "label": 3.0}, {"pred": -0.20988516509532928, "label": 0.0}, {"pred": -2.292656660079956, "label": 0.0}, {"pred": -2.090421438217163, "label": 2.0}, {"pred": 9.354241371154785, "label": 2.0}, {"pred": -3.073307514190674, "label": 2.0}, {"pred": 0.21364711225032806, "label": 8.0}, {"pred": 9.965922355651855, "label": 6.0}, {"pred": -3.118712902069092, "label": 2.0}, {"pred": -0.03438292443752289, "label": 2.0}, {"pred": 0.2002769261598587, "label": 8.0}, {"pred": -1.063583254814148, "label": 0.0}, {"pred": -1.177560567855835, "label": 2.0}, {"pred": 0.24500791728496552, "label": 3.0}, {"pred": -2.689775228500366, "label": 3.0}, {"pred": -1.0503861904144287, "label": 2.0}, {"pred": 7.594336032867432, "label": 6.0}, {"pred": 2.1890816688537598, "label": 0.0}, {"pred": -1.7860941886901855, "label": 0.0}, {"pred": 5.3757171630859375, "label": 5.0}, {"pred": -1.2799867391586304, "label": 5.0}, {"pred": -2.6802003383636475, "label": 5.0}, {"pred": -3.188868761062622, "label": 5.0}, {"pred": 1.7214399576187134, "label": 5.0}, {"pred": -1.2691649198532104, "label": 2.0}, {"pred": 0.995307207107544, "label": 2.0}, {"pred": 5.972494125366211, "label": 2.0}, {"pred": -3.2425472736358643, "label": 5.0}, {"pred": 0.8252993226051331, "label": 4.0}, {"pred": 0.8438105583190918, "label": 8.0}, {"pred": -1.5899057388305664, "label": 4.0}, {"pred": -3.049546241760254, "label": 4.0}, {"pred": -1.266801357269287, "label": 4.0}, {"pred": -1.5122487545013428, "label": 8.0}, {"pred": -1.030104160308838, "label": 2.0}, {"pred": -0.673987627029419, "label": 3.0}, {"pred": 5.558537483215332, "label": 2.0}, {"pred": 6.735321998596191, "label": 5.0}, {"pred": -0.7411388158798218, "label": 5.0}, {"pred": -1.8602837324142456, "label": 5.0}, {"pred": -2.5063188076019287, "label": 0.0}, {"pred": -0.9122320413589478, "label": 6.0}, {"pred": -2.421311616897583, "label": 5.0}, {"pred": -1.025620937347412, "label": 5.0}, {"pred": -1.5433242321014404, "label": 5.0}, {"pred": -0.10171565413475037, "label": 2.0}, {"pred": 2.286135673522949, "label": 2.0}, {"pred": 7.0758748054504395, "label": 2.0}, {"pred": -2.814152240753174, "label": 2.0}, {"pred": 0.2042846530675888, "label": 2.0}, {"pred": -0.3653508126735687, "label": 2.0}, {"pred": -1.995278000831604, "label": 5.0}, {"pred": -1.831222653388977, "label": 5.0}, {"pred": -1.064313292503357, "label": 6.0}, {"pred": 2.56392765045166, "label": 6.0}, {"pred": 8.376298904418945, "label": 0.0}, {"pred": 1.3188626766204834, "label": 2.0}, {"pred": -2.23551607131958, "label": 2.0}, {"pred": -2.767118215560913, "label": 6.0}, {"pred": -0.0966845378279686, "label": 2.0}, {"pred": -1.313618540763855, "label": 5.0}, {"pred": -0.4058559834957123, "label": 5.0}, {"pred": 1.0895838737487793, "label": 6.0}, {"pred": -2.1371374130249023, "label": 2.0}, {"pred": 1.4900816679000854, "label": 5.0}, {"pred": 6.579888343811035, "label": 2.0}, {"pred": -3.2009799480438232, "label": 8.0}, {"pred": -2.073420524597168, "label": 2.0}, {"pred": -1.242249608039856, "label": 2.0}, {"pred": -1.9888185262680054, "label": 5.0}, {"pred": 0.48061397671699524, "label": 8.0}, {"pred": 0.4645574390888214, "label": 5.0}, {"pred": -1.0836477279663086, "label": 5.0}, {"pred": 2.5422253608703613, "label": 5.0}, {"pred": 6.876040458679199, "label": 5.0}, {"pred": -3.140732765197754, "label": 5.0}, {"pred": -2.5778374671936035, "label": 6.0}, {"pred": 0.025611255317926407, "label": 8.0}, {"pred": 1.1743779182434082, "label": 6.0}, {"pred": 0.28550976514816284, "label": 7.0}, {"pred": -0.5769954323768616, "label": 6.0}, {"pred": -1.9950687885284424, "label": 8.0}, {"pred": -0.8799560070037842, "label": 0.0}, {"pred": 7.703958511352539, "label": 2.0}, {"pred": -1.9189386367797852, "label": 6.0}, {"pred": -3.1383209228515625, "label": 5.0}, {"pred": 2.151003837585449, "label": 5.0}, {"pred": -0.1051066666841507, "label": 2.0}, {"pred": -2.95643949508667, "label": 2.0}, {"pred": -0.4773956835269928, "label": 8.0}, {"pred": -1.499645471572876, "label": 8.0}, {"pred": -0.9763684868812561, "label": 0.0}, {"pred": 8.102483749389648, "label": 2.0}, {"pred": -2.9032540321350098, "label": 0.0}, {"pred": -1.7431988716125488, "label": 7.0}, {"pred": 3.5589823722839355, "label": 2.0}, {"pred": -1.2039445638656616, "label": 2.0}, {"pred": -4.388154029846191, "label": 8.0}, {"pred": 0.5220628380775452, "label": 6.0}, {"pred": -0.9276381134986877, "label": 6.0}, {"pred": 0.7515405416488647, "label": 5.0}, {"pred": 6.313348770141602, "label": 5.0}, {"pred": -3.2387163639068604, "label": 5.0}, {"pred": -1.8151580095291138, "label": 5.0}, {"pred": -1.4219468832015991, "label": 5.0}, {"pred": -1.951231598854065, "label": 5.0}, {"pred": -1.1042416095733643, "label": 5.0}, {"pred": -1.4297571182250977, "label": 6.0}, {"pred": -1.0193731784820557, "label": 8.0}, {"pred": -0.10052667558193207, "label": 5.0}, {"pred": -1.7839338779449463, "label": 5.0}, {"pred": -1.3848637342453003, "label": 3.0}, {"pred": 7.9457926750183105, "label": 3.0}, {"pred": -2.433159828186035, "label": 6.0}, {"pred": -2.399423122406006, "label": 2.0}, {"pred": -0.9965806007385254, "label": 3.0}, {"pred": -1.350462794303894, "label": 2.0}, {"pred": -1.4989750385284424, "label": 2.0}, {"pred": -1.0299936532974243, "label": 2.0}, {"pred": -1.6915379762649536, "label": 0.0}, {"pred": -1.2833725214004517, "label": 2.0}, {"pred": 9.242444038391113, "label": 5.0}, {"pred": -1.860719919204712, "label": 5.0}, {"pred": 0.09586504101753235, "label": 5.0}, {"pred": 8.429362297058105, "label": 2.0}, {"pred": 4.24113655090332, "label": 6.0}, {"pred": -3.6316137313842773, "label": 3.0}, {"pred": -1.099712610244751, "label": 3.0}, {"pred": -0.07706806808710098, "label": 6.0}, {"pred": -2.187944173812866, "label": 3.0}, {"pred": -3.425537586212158, "label": 2.0}, {"pred": 1.153124451637268, "label": 8.0}, {"pred": -1.4549442529678345, "label": 8.0}, {"pred": -2.9547150135040283, "label": 0.0}, {"pred": 0.5087819695472717, "label": 1.0}, {"pred": -0.8468652367591858, "label": 2.0}, {"pred": 4.418088912963867, "label": 3.0}, {"pred": 6.098045349121094, "label": 6.0}, {"pred": -3.040365219116211, "label": 8.0}, {"pred": -2.9563238620758057, "label": 7.0}, {"pred": -1.0640069246292114, "label": 0.0}, {"pred": 0.4458455443382263, "label": 8.0}, {"pred": 2.9810967445373535, "label": 2.0}, {"pred": 1.6232993602752686, "label": 2.0}, {"pred": -2.709993600845337, "label": 5.0}, {"pred": 6.805906772613525, "label": 5.0}, {"pred": 2.1797492504119873, "label": 7.0}, {"pred": -4.349334239959717, "label": 2.0}, {"pred": -4.006707191467285, "label": 2.0}, {"pred": -2.190566301345825, "label": 8.0}, {"pred": -0.9495126008987427, "label": 5.0}, {"pred": 7.999160289764404, "label": 5.0}, {"pred": 3.7388970851898193, "label": 2.0}, {"pred": -2.9454569816589355, "label": 6.0}, {"pred": 2.977922201156616, "label": 6.0}, {"pred": -1.7080835103988647, "label": 2.0}, {"pred": -2.4555013179779053, "label": 5.0}, {"pred": -2.324666738510132, "label": 2.0}, {"pred": -3.444615364074707, "label": 5.0}, {"pred": 0.11816424131393433, "label": 5.0}, {"pred": 9.905989646911621, "label": 8.0}, {"pred": -2.962965488433838, "label": 5.0}, {"pred": 0.030992530286312103, "label": 2.0}, {"pred": 0.42090052366256714, "label": 6.0}, {"pred": -1.4415723085403442, "label": 6.0}, {"pred": -1.0313920974731445, "label": 5.0}, {"pred": 0.4347422420978546, "label": 6.0}, {"pred": -2.240575075149536, "label": 2.0}, {"pred": -0.5948125123977661, "label": 2.0}, {"pred": 6.614602565765381, "label": 5.0}, {"pred": 4.726541042327881, "label": 5.0}, {"pred": -3.396735191345215, "label": 0.0}, {"pred": -0.8261128664016724, "label": 5.0}, {"pred": -2.3094027042388916, "label": 2.0}, {"pred": 0.37960976362228394, "label": 2.0}, {"pred": -3.0307390689849854, "label": 2.0}, {"pred": -3.9579150676727295, "label": 5.0}, {"pred": -2.420196294784546, "label": 2.0}, {"pred": 6.9699907302856445, "label": 6.0}, {"pred": -1.044237494468689, "label": 6.0}, {"pred": -1.1202991008758545, "label": 6.0}, {"pred": 6.221286773681641, "label": 6.0}, {"pred": 1.2837556600570679, "label": 5.0}, {"pred": -1.6445438861846924, "label": 5.0}, {"pred": -2.026249647140503, "label": 5.0}, {"pred": -3.088744640350342, "label": 5.0}, {"pred": -3.18585205078125, "label": 5.0}, {"pred": 3.9137914180755615, "label": 2.0}, {"pred": 5.3143630027771, "label": 5.0}, {"pred": -3.272541046142578, "label": 8.0}, {"pred": 1.741031289100647, "label": 8.0}, {"pred": 1.3089599609375, "label": 0.0}, {"pred": -1.2750519514083862, "label": 3.0}, {"pred": -0.41170594096183777, "label": 2.0}, {"pred": -2.5046517848968506, "label": 2.0}, {"pred": -0.3387974500656128, "label": 2.0}, {"pred": 5.276491641998291, "label": 8.0}, {"pred": -1.4969731569290161, "label": 8.0}, {"pred": -1.0964272022247314, "label": 0.0}, {"pred": 5.757904529571533, "label": 2.0}, {"pred": 0.23799118399620056, "label": 0.0}, {"pred": -3.1069884300231934, "label": 7.0}, {"pred": -1.6246393918991089, "label": 0.0}, {"pred": -3.437112331390381, "label": 0.0}, {"pred": -0.5458642244338989, "label": 0.0}, {"pred": 3.5296177864074707, "label": 8.0}, {"pred": 0.6833158135414124, "label": 8.0}, {"pred": -1.6968615055084229, "label": 0.0}, {"pred": 6.245951175689697, "label": 0.0}, {"pred": 2.876694440841675, "label": 2.0}, {"pred": -3.0809290409088135, "label": 2.0}, {"pred": -3.641925096511841, "label": 0.0}]
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 714922721
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:028c21ee2bcbc4765d7f542294c6a65021701767f2244e0a78e657a7356d484a
|
| 3 |
size 714922721
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 5.0,
|
| 3 |
-
"train_loss": 0.
|
| 4 |
-
"train_runtime":
|
| 5 |
"train_samples": 702,
|
| 6 |
-
"train_samples_per_second": 4.
|
| 7 |
-
"train_steps_per_second": 4.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 5.0,
|
| 3 |
+
"train_loss": 0.6706694952103487,
|
| 4 |
+
"train_runtime": 824.1732,
|
| 5 |
"train_samples": 702,
|
| 6 |
+
"train_samples_per_second": 4.259,
|
| 7 |
+
"train_steps_per_second": 4.259
|
| 8 |
}
|
trainer_state.json
CHANGED
|
@@ -9,369 +9,369 @@
|
|
| 9 |
"log_history": [
|
| 10 |
{
|
| 11 |
"epoch": 0.14,
|
| 12 |
-
"eval_f1": 0.
|
| 13 |
-
"eval_loss": 1.
|
| 14 |
-
"eval_runtime": 3.
|
| 15 |
-
"eval_samples_per_second": 29.
|
| 16 |
-
"eval_steps_per_second": 29.
|
| 17 |
"step": 100
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.28,
|
| 21 |
-
"eval_f1": 0.
|
| 22 |
-
"eval_loss": 1.
|
| 23 |
-
"eval_runtime": 3.
|
| 24 |
-
"eval_samples_per_second": 29.
|
| 25 |
-
"eval_steps_per_second": 29.
|
| 26 |
"step": 200
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"epoch": 0.43,
|
| 30 |
-
"eval_f1": 0.
|
| 31 |
-
"eval_loss": 1.
|
| 32 |
-
"eval_runtime": 3.
|
| 33 |
-
"eval_samples_per_second": 29.
|
| 34 |
-
"eval_steps_per_second": 29.
|
| 35 |
"step": 300
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"epoch": 0.57,
|
| 39 |
-
"eval_f1": 0.
|
| 40 |
-
"eval_loss": 1.
|
| 41 |
-
"eval_runtime": 3.
|
| 42 |
-
"eval_samples_per_second": 29.
|
| 43 |
-
"eval_steps_per_second": 29.
|
| 44 |
"step": 400
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.71,
|
| 48 |
"learning_rate": 2.572649572649573e-05,
|
| 49 |
-
"loss": 1.
|
| 50 |
"step": 500
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"epoch": 0.71,
|
| 54 |
-
"eval_f1": 0.
|
| 55 |
-
"eval_loss": 1.
|
| 56 |
-
"eval_runtime": 3.
|
| 57 |
-
"eval_samples_per_second": 29.
|
| 58 |
-
"eval_steps_per_second": 29.
|
| 59 |
"step": 500
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"epoch": 0.85,
|
| 63 |
-
"eval_f1": 0.
|
| 64 |
-
"eval_loss": 1.
|
| 65 |
-
"eval_runtime": 3.
|
| 66 |
-
"eval_samples_per_second": 29.
|
| 67 |
-
"eval_steps_per_second": 29.
|
| 68 |
"step": 600
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"epoch": 1.0,
|
| 72 |
-
"eval_f1": 0.
|
| 73 |
-
"eval_loss": 1.
|
| 74 |
-
"eval_runtime": 3.
|
| 75 |
-
"eval_samples_per_second": 29.
|
| 76 |
-
"eval_steps_per_second": 29.
|
| 77 |
"step": 700
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"epoch": 1.14,
|
| 81 |
-
"eval_f1": 0.
|
| 82 |
-
"eval_loss": 1.
|
| 83 |
-
"eval_runtime": 3.
|
| 84 |
-
"eval_samples_per_second":
|
| 85 |
-
"eval_steps_per_second":
|
| 86 |
"step": 800
|
| 87 |
},
|
| 88 |
{
|
| 89 |
"epoch": 1.28,
|
| 90 |
-
"eval_f1": 0.
|
| 91 |
-
"eval_loss": 1.
|
| 92 |
-
"eval_runtime": 3.
|
| 93 |
-
"eval_samples_per_second": 29.
|
| 94 |
-
"eval_steps_per_second": 29.
|
| 95 |
"step": 900
|
| 96 |
},
|
| 97 |
{
|
| 98 |
"epoch": 1.42,
|
| 99 |
"learning_rate": 2.1452991452991456e-05,
|
| 100 |
-
"loss": 0.
|
| 101 |
"step": 1000
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"epoch": 1.42,
|
| 105 |
-
"eval_f1": 0.
|
| 106 |
-
"eval_loss": 1.
|
| 107 |
-
"eval_runtime": 3.
|
| 108 |
-
"eval_samples_per_second": 29.
|
| 109 |
-
"eval_steps_per_second": 29.
|
| 110 |
"step": 1000
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"epoch": 1.57,
|
| 114 |
-
"eval_f1": 0.
|
| 115 |
-
"eval_loss": 1.
|
| 116 |
-
"eval_runtime": 3.
|
| 117 |
-
"eval_samples_per_second": 29.
|
| 118 |
-
"eval_steps_per_second": 29.
|
| 119 |
"step": 1100
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"epoch": 1.71,
|
| 123 |
-
"eval_f1": 0.
|
| 124 |
-
"eval_loss": 1.
|
| 125 |
-
"eval_runtime": 3.
|
| 126 |
-
"eval_samples_per_second": 29.
|
| 127 |
-
"eval_steps_per_second": 29.
|
| 128 |
"step": 1200
|
| 129 |
},
|
| 130 |
{
|
| 131 |
"epoch": 1.85,
|
| 132 |
-
"eval_f1": 0.
|
| 133 |
-
"eval_loss": 1.
|
| 134 |
-
"eval_runtime": 3.
|
| 135 |
-
"eval_samples_per_second":
|
| 136 |
-
"eval_steps_per_second":
|
| 137 |
"step": 1300
|
| 138 |
},
|
| 139 |
{
|
| 140 |
"epoch": 1.99,
|
| 141 |
-
"eval_f1": 0.
|
| 142 |
-
"eval_loss": 1.
|
| 143 |
-
"eval_runtime": 3.
|
| 144 |
-
"eval_samples_per_second": 29.
|
| 145 |
-
"eval_steps_per_second": 29.
|
| 146 |
"step": 1400
|
| 147 |
},
|
| 148 |
{
|
| 149 |
"epoch": 2.14,
|
| 150 |
"learning_rate": 1.7179487179487178e-05,
|
| 151 |
-
"loss": 0.
|
| 152 |
"step": 1500
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"epoch": 2.14,
|
| 156 |
-
"eval_f1": 0.
|
| 157 |
-
"eval_loss": 1.
|
| 158 |
-
"eval_runtime": 3.
|
| 159 |
-
"eval_samples_per_second": 29.
|
| 160 |
-
"eval_steps_per_second": 29.
|
| 161 |
"step": 1500
|
| 162 |
},
|
| 163 |
{
|
| 164 |
"epoch": 2.28,
|
| 165 |
-
"eval_f1": 0.
|
| 166 |
-
"eval_loss": 1.
|
| 167 |
-
"eval_runtime": 3.
|
| 168 |
-
"eval_samples_per_second":
|
| 169 |
-
"eval_steps_per_second":
|
| 170 |
"step": 1600
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"epoch": 2.42,
|
| 174 |
-
"eval_f1": 0.
|
| 175 |
-
"eval_loss": 1.
|
| 176 |
-
"eval_runtime": 3.
|
| 177 |
-
"eval_samples_per_second":
|
| 178 |
-
"eval_steps_per_second":
|
| 179 |
"step": 1700
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"epoch": 2.56,
|
| 183 |
-
"eval_f1": 0.
|
| 184 |
-
"eval_loss": 1.
|
| 185 |
-
"eval_runtime": 3.
|
| 186 |
-
"eval_samples_per_second":
|
| 187 |
-
"eval_steps_per_second":
|
| 188 |
"step": 1800
|
| 189 |
},
|
| 190 |
{
|
| 191 |
"epoch": 2.71,
|
| 192 |
-
"eval_f1": 0.
|
| 193 |
-
"eval_loss": 1.
|
| 194 |
-
"eval_runtime": 3.
|
| 195 |
-
"eval_samples_per_second": 29.
|
| 196 |
-
"eval_steps_per_second": 29.
|
| 197 |
"step": 1900
|
| 198 |
},
|
| 199 |
{
|
| 200 |
"epoch": 2.85,
|
| 201 |
"learning_rate": 1.2905982905982905e-05,
|
| 202 |
-
"loss": 0.
|
| 203 |
"step": 2000
|
| 204 |
},
|
| 205 |
{
|
| 206 |
"epoch": 2.85,
|
| 207 |
-
"eval_f1": 0.
|
| 208 |
-
"eval_loss": 1.
|
| 209 |
-
"eval_runtime": 3.
|
| 210 |
-
"eval_samples_per_second": 29.
|
| 211 |
-
"eval_steps_per_second": 29.
|
| 212 |
"step": 2000
|
| 213 |
},
|
| 214 |
{
|
| 215 |
"epoch": 2.99,
|
| 216 |
-
"eval_f1": 0.
|
| 217 |
-
"eval_loss": 1.
|
| 218 |
-
"eval_runtime": 3.
|
| 219 |
-
"eval_samples_per_second": 29.
|
| 220 |
-
"eval_steps_per_second": 29.
|
| 221 |
"step": 2100
|
| 222 |
},
|
| 223 |
{
|
| 224 |
"epoch": 3.13,
|
| 225 |
-
"eval_f1": 0.
|
| 226 |
-
"eval_loss": 1.
|
| 227 |
-
"eval_runtime": 3.
|
| 228 |
-
"eval_samples_per_second": 29.
|
| 229 |
-
"eval_steps_per_second": 29.
|
| 230 |
"step": 2200
|
| 231 |
},
|
| 232 |
{
|
| 233 |
"epoch": 3.28,
|
| 234 |
-
"eval_f1": 0.
|
| 235 |
-
"eval_loss": 1.
|
| 236 |
-
"eval_runtime": 3.
|
| 237 |
-
"eval_samples_per_second": 29.
|
| 238 |
-
"eval_steps_per_second": 29.
|
| 239 |
"step": 2300
|
| 240 |
},
|
| 241 |
{
|
| 242 |
"epoch": 3.42,
|
| 243 |
-
"eval_f1": 0.
|
| 244 |
-
"eval_loss": 1.
|
| 245 |
-
"eval_runtime": 3.
|
| 246 |
-
"eval_samples_per_second": 29.
|
| 247 |
-
"eval_steps_per_second": 29.
|
| 248 |
"step": 2400
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 3.56,
|
| 252 |
"learning_rate": 8.632478632478633e-06,
|
| 253 |
-
"loss": 0.
|
| 254 |
"step": 2500
|
| 255 |
},
|
| 256 |
{
|
| 257 |
"epoch": 3.56,
|
| 258 |
-
"eval_f1": 0.
|
| 259 |
-
"eval_loss": 1.
|
| 260 |
-
"eval_runtime": 3.
|
| 261 |
-
"eval_samples_per_second": 29.
|
| 262 |
-
"eval_steps_per_second": 29.
|
| 263 |
"step": 2500
|
| 264 |
},
|
| 265 |
{
|
| 266 |
"epoch": 3.7,
|
| 267 |
-
"eval_f1": 0.
|
| 268 |
-
"eval_loss": 1.
|
| 269 |
-
"eval_runtime": 3.
|
| 270 |
-
"eval_samples_per_second": 29.
|
| 271 |
-
"eval_steps_per_second": 29.
|
| 272 |
"step": 2600
|
| 273 |
},
|
| 274 |
{
|
| 275 |
"epoch": 3.85,
|
| 276 |
-
"eval_f1": 0.
|
| 277 |
-
"eval_loss": 1.
|
| 278 |
-
"eval_runtime": 3.
|
| 279 |
-
"eval_samples_per_second": 29.
|
| 280 |
-
"eval_steps_per_second": 29.
|
| 281 |
"step": 2700
|
| 282 |
},
|
| 283 |
{
|
| 284 |
"epoch": 3.99,
|
| 285 |
-
"eval_f1": 0.
|
| 286 |
-
"eval_loss": 1.
|
| 287 |
-
"eval_runtime": 3.
|
| 288 |
-
"eval_samples_per_second": 29.
|
| 289 |
-
"eval_steps_per_second": 29.
|
| 290 |
"step": 2800
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"epoch": 4.13,
|
| 294 |
-
"eval_f1": 0.
|
| 295 |
-
"eval_loss": 1.
|
| 296 |
-
"eval_runtime": 3.
|
| 297 |
-
"eval_samples_per_second": 29.
|
| 298 |
-
"eval_steps_per_second": 29.
|
| 299 |
"step": 2900
|
| 300 |
},
|
| 301 |
{
|
| 302 |
"epoch": 4.27,
|
| 303 |
"learning_rate": 4.358974358974359e-06,
|
| 304 |
-
"loss": 0.
|
| 305 |
"step": 3000
|
| 306 |
},
|
| 307 |
{
|
| 308 |
"epoch": 4.27,
|
| 309 |
-
"eval_f1": 0.
|
| 310 |
-
"eval_loss": 1.
|
| 311 |
-
"eval_runtime": 3.
|
| 312 |
-
"eval_samples_per_second": 29.
|
| 313 |
-
"eval_steps_per_second": 29.
|
| 314 |
"step": 3000
|
| 315 |
},
|
| 316 |
{
|
| 317 |
"epoch": 4.42,
|
| 318 |
-
"eval_f1": 0.
|
| 319 |
-
"eval_loss": 1.
|
| 320 |
-
"eval_runtime": 3.
|
| 321 |
-
"eval_samples_per_second": 29.
|
| 322 |
-
"eval_steps_per_second": 29.
|
| 323 |
"step": 3100
|
| 324 |
},
|
| 325 |
{
|
| 326 |
"epoch": 4.56,
|
| 327 |
-
"eval_f1": 0.
|
| 328 |
-
"eval_loss": 1.
|
| 329 |
-
"eval_runtime": 3.
|
| 330 |
-
"eval_samples_per_second":
|
| 331 |
-
"eval_steps_per_second":
|
| 332 |
"step": 3200
|
| 333 |
},
|
| 334 |
{
|
| 335 |
"epoch": 4.7,
|
| 336 |
-
"eval_f1": 0.
|
| 337 |
-
"eval_loss": 1.
|
| 338 |
-
"eval_runtime": 3.
|
| 339 |
-
"eval_samples_per_second": 29.
|
| 340 |
-
"eval_steps_per_second": 29.
|
| 341 |
"step": 3300
|
| 342 |
},
|
| 343 |
{
|
| 344 |
"epoch": 4.84,
|
| 345 |
-
"eval_f1": 0.
|
| 346 |
-
"eval_loss": 1.
|
| 347 |
-
"eval_runtime": 3.
|
| 348 |
-
"eval_samples_per_second": 29.
|
| 349 |
-
"eval_steps_per_second": 29.
|
| 350 |
"step": 3400
|
| 351 |
},
|
| 352 |
{
|
| 353 |
"epoch": 4.99,
|
| 354 |
"learning_rate": 8.547008547008547e-08,
|
| 355 |
-
"loss": 0.
|
| 356 |
"step": 3500
|
| 357 |
},
|
| 358 |
{
|
| 359 |
"epoch": 4.99,
|
| 360 |
-
"eval_f1": 0.
|
| 361 |
-
"eval_loss": 1.
|
| 362 |
-
"eval_runtime": 3.
|
| 363 |
-
"eval_samples_per_second": 29.
|
| 364 |
-
"eval_steps_per_second": 29.
|
| 365 |
"step": 3500
|
| 366 |
},
|
| 367 |
{
|
| 368 |
"epoch": 5.0,
|
| 369 |
"step": 3510,
|
| 370 |
"total_flos": 2890172619430200.0,
|
| 371 |
-
"train_loss": 0.
|
| 372 |
-
"train_runtime":
|
| 373 |
-
"train_samples_per_second": 4.
|
| 374 |
-
"train_steps_per_second": 4.
|
| 375 |
}
|
| 376 |
],
|
| 377 |
"max_steps": 3510,
|
|
|
|
| 9 |
"log_history": [
|
| 10 |
{
|
| 11 |
"epoch": 0.14,
|
| 12 |
+
"eval_f1": 0.37622621795062866,
|
| 13 |
+
"eval_loss": 1.36316978931427,
|
| 14 |
+
"eval_runtime": 3.2856,
|
| 15 |
+
"eval_samples_per_second": 29.827,
|
| 16 |
+
"eval_steps_per_second": 29.827,
|
| 17 |
"step": 100
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.28,
|
| 21 |
+
"eval_f1": 0.41617398891170265,
|
| 22 |
+
"eval_loss": 1.227824330329895,
|
| 23 |
+
"eval_runtime": 3.2714,
|
| 24 |
+
"eval_samples_per_second": 29.957,
|
| 25 |
+
"eval_steps_per_second": 29.957,
|
| 26 |
"step": 200
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"epoch": 0.43,
|
| 30 |
+
"eval_f1": 0.41594055599199414,
|
| 31 |
+
"eval_loss": 1.1802130937576294,
|
| 32 |
+
"eval_runtime": 3.2851,
|
| 33 |
+
"eval_samples_per_second": 29.832,
|
| 34 |
+
"eval_steps_per_second": 29.832,
|
| 35 |
"step": 300
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"epoch": 0.57,
|
| 39 |
+
"eval_f1": 0.4879226887989845,
|
| 40 |
+
"eval_loss": 1.3237018585205078,
|
| 41 |
+
"eval_runtime": 3.2714,
|
| 42 |
+
"eval_samples_per_second": 29.957,
|
| 43 |
+
"eval_steps_per_second": 29.957,
|
| 44 |
"step": 400
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.71,
|
| 48 |
"learning_rate": 2.572649572649573e-05,
|
| 49 |
+
"loss": 1.2,
|
| 50 |
"step": 500
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"epoch": 0.71,
|
| 54 |
+
"eval_f1": 0.46450380175056494,
|
| 55 |
+
"eval_loss": 1.2971174716949463,
|
| 56 |
+
"eval_runtime": 3.2843,
|
| 57 |
+
"eval_samples_per_second": 29.839,
|
| 58 |
+
"eval_steps_per_second": 29.839,
|
| 59 |
"step": 500
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"epoch": 0.85,
|
| 63 |
+
"eval_f1": 0.5019868520647613,
|
| 64 |
+
"eval_loss": 1.2549620866775513,
|
| 65 |
+
"eval_runtime": 3.267,
|
| 66 |
+
"eval_samples_per_second": 29.997,
|
| 67 |
+
"eval_steps_per_second": 29.997,
|
| 68 |
"step": 600
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"epoch": 1.0,
|
| 72 |
+
"eval_f1": 0.48057967334012397,
|
| 73 |
+
"eval_loss": 1.1853649616241455,
|
| 74 |
+
"eval_runtime": 3.277,
|
| 75 |
+
"eval_samples_per_second": 29.905,
|
| 76 |
+
"eval_steps_per_second": 29.905,
|
| 77 |
"step": 700
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"epoch": 1.14,
|
| 81 |
+
"eval_f1": 0.5011814210846155,
|
| 82 |
+
"eval_loss": 1.1788480281829834,
|
| 83 |
+
"eval_runtime": 3.2639,
|
| 84 |
+
"eval_samples_per_second": 30.025,
|
| 85 |
+
"eval_steps_per_second": 30.025,
|
| 86 |
"step": 800
|
| 87 |
},
|
| 88 |
{
|
| 89 |
"epoch": 1.28,
|
| 90 |
+
"eval_f1": 0.4964300899620197,
|
| 91 |
+
"eval_loss": 1.093542218208313,
|
| 92 |
+
"eval_runtime": 3.2724,
|
| 93 |
+
"eval_samples_per_second": 29.947,
|
| 94 |
+
"eval_steps_per_second": 29.947,
|
| 95 |
"step": 900
|
| 96 |
},
|
| 97 |
{
|
| 98 |
"epoch": 1.42,
|
| 99 |
"learning_rate": 2.1452991452991456e-05,
|
| 100 |
+
"loss": 0.9189,
|
| 101 |
"step": 1000
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"epoch": 1.42,
|
| 105 |
+
"eval_f1": 0.4986272191320895,
|
| 106 |
+
"eval_loss": 1.2862237691879272,
|
| 107 |
+
"eval_runtime": 3.302,
|
| 108 |
+
"eval_samples_per_second": 29.679,
|
| 109 |
+
"eval_steps_per_second": 29.679,
|
| 110 |
"step": 1000
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"epoch": 1.57,
|
| 114 |
+
"eval_f1": 0.49297809308258944,
|
| 115 |
+
"eval_loss": 1.2222929000854492,
|
| 116 |
+
"eval_runtime": 3.3171,
|
| 117 |
+
"eval_samples_per_second": 29.544,
|
| 118 |
+
"eval_steps_per_second": 29.544,
|
| 119 |
"step": 1100
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"epoch": 1.71,
|
| 123 |
+
"eval_f1": 0.4953797333525823,
|
| 124 |
+
"eval_loss": 1.1196690797805786,
|
| 125 |
+
"eval_runtime": 3.2943,
|
| 126 |
+
"eval_samples_per_second": 29.749,
|
| 127 |
+
"eval_steps_per_second": 29.749,
|
| 128 |
"step": 1200
|
| 129 |
},
|
| 130 |
{
|
| 131 |
"epoch": 1.85,
|
| 132 |
+
"eval_f1": 0.5153008157478032,
|
| 133 |
+
"eval_loss": 1.1256704330444336,
|
| 134 |
+
"eval_runtime": 3.2631,
|
| 135 |
+
"eval_samples_per_second": 30.033,
|
| 136 |
+
"eval_steps_per_second": 30.033,
|
| 137 |
"step": 1300
|
| 138 |
},
|
| 139 |
{
|
| 140 |
"epoch": 1.99,
|
| 141 |
+
"eval_f1": 0.5263780363862973,
|
| 142 |
+
"eval_loss": 1.1729286909103394,
|
| 143 |
+
"eval_runtime": 3.2904,
|
| 144 |
+
"eval_samples_per_second": 29.783,
|
| 145 |
+
"eval_steps_per_second": 29.783,
|
| 146 |
"step": 1400
|
| 147 |
},
|
| 148 |
{
|
| 149 |
"epoch": 2.14,
|
| 150 |
"learning_rate": 1.7179487179487178e-05,
|
| 151 |
+
"loss": 0.8143,
|
| 152 |
"step": 1500
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"epoch": 2.14,
|
| 156 |
+
"eval_f1": 0.5165321012151871,
|
| 157 |
+
"eval_loss": 1.272233486175537,
|
| 158 |
+
"eval_runtime": 3.3087,
|
| 159 |
+
"eval_samples_per_second": 29.619,
|
| 160 |
+
"eval_steps_per_second": 29.619,
|
| 161 |
"step": 1500
|
| 162 |
},
|
| 163 |
{
|
| 164 |
"epoch": 2.28,
|
| 165 |
+
"eval_f1": 0.539472065505205,
|
| 166 |
+
"eval_loss": 1.3217926025390625,
|
| 167 |
+
"eval_runtime": 3.2634,
|
| 168 |
+
"eval_samples_per_second": 30.03,
|
| 169 |
+
"eval_steps_per_second": 30.03,
|
| 170 |
"step": 1600
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"epoch": 2.42,
|
| 174 |
+
"eval_f1": 0.5170136038987323,
|
| 175 |
+
"eval_loss": 1.338261604309082,
|
| 176 |
+
"eval_runtime": 3.2635,
|
| 177 |
+
"eval_samples_per_second": 30.029,
|
| 178 |
+
"eval_steps_per_second": 30.029,
|
| 179 |
"step": 1700
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"epoch": 2.56,
|
| 183 |
+
"eval_f1": 0.5138801729725696,
|
| 184 |
+
"eval_loss": 1.250339388847351,
|
| 185 |
+
"eval_runtime": 3.2656,
|
| 186 |
+
"eval_samples_per_second": 30.009,
|
| 187 |
+
"eval_steps_per_second": 30.009,
|
| 188 |
"step": 1800
|
| 189 |
},
|
| 190 |
{
|
| 191 |
"epoch": 2.71,
|
| 192 |
+
"eval_f1": 0.523963853035474,
|
| 193 |
+
"eval_loss": 1.362999439239502,
|
| 194 |
+
"eval_runtime": 3.3211,
|
| 195 |
+
"eval_samples_per_second": 29.508,
|
| 196 |
+
"eval_steps_per_second": 29.508,
|
| 197 |
"step": 1900
|
| 198 |
},
|
| 199 |
{
|
| 200 |
"epoch": 2.85,
|
| 201 |
"learning_rate": 1.2905982905982905e-05,
|
| 202 |
+
"loss": 0.6175,
|
| 203 |
"step": 2000
|
| 204 |
},
|
| 205 |
{
|
| 206 |
"epoch": 2.85,
|
| 207 |
+
"eval_f1": 0.5305458058252502,
|
| 208 |
+
"eval_loss": 1.402750015258789,
|
| 209 |
+
"eval_runtime": 3.2768,
|
| 210 |
+
"eval_samples_per_second": 29.907,
|
| 211 |
+
"eval_steps_per_second": 29.907,
|
| 212 |
"step": 2000
|
| 213 |
},
|
| 214 |
{
|
| 215 |
"epoch": 2.99,
|
| 216 |
+
"eval_f1": 0.5408209021870833,
|
| 217 |
+
"eval_loss": 1.4016790390014648,
|
| 218 |
+
"eval_runtime": 3.3122,
|
| 219 |
+
"eval_samples_per_second": 29.588,
|
| 220 |
+
"eval_steps_per_second": 29.588,
|
| 221 |
"step": 2100
|
| 222 |
},
|
| 223 |
{
|
| 224 |
"epoch": 3.13,
|
| 225 |
+
"eval_f1": 0.541281162975512,
|
| 226 |
+
"eval_loss": 1.5929616689682007,
|
| 227 |
+
"eval_runtime": 3.294,
|
| 228 |
+
"eval_samples_per_second": 29.751,
|
| 229 |
+
"eval_steps_per_second": 29.751,
|
| 230 |
"step": 2200
|
| 231 |
},
|
| 232 |
{
|
| 233 |
"epoch": 3.28,
|
| 234 |
+
"eval_f1": 0.5564758214624422,
|
| 235 |
+
"eval_loss": 1.5372625589370728,
|
| 236 |
+
"eval_runtime": 3.2882,
|
| 237 |
+
"eval_samples_per_second": 29.803,
|
| 238 |
+
"eval_steps_per_second": 29.803,
|
| 239 |
"step": 2300
|
| 240 |
},
|
| 241 |
{
|
| 242 |
"epoch": 3.42,
|
| 243 |
+
"eval_f1": 0.5722151004353093,
|
| 244 |
+
"eval_loss": 1.5012538433074951,
|
| 245 |
+
"eval_runtime": 3.3067,
|
| 246 |
+
"eval_samples_per_second": 29.637,
|
| 247 |
+
"eval_steps_per_second": 29.637,
|
| 248 |
"step": 2400
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 3.56,
|
| 252 |
"learning_rate": 8.632478632478633e-06,
|
| 253 |
+
"loss": 0.4726,
|
| 254 |
"step": 2500
|
| 255 |
},
|
| 256 |
{
|
| 257 |
"epoch": 3.56,
|
| 258 |
+
"eval_f1": 0.5226487560978434,
|
| 259 |
+
"eval_loss": 1.570418119430542,
|
| 260 |
+
"eval_runtime": 3.3114,
|
| 261 |
+
"eval_samples_per_second": 29.595,
|
| 262 |
+
"eval_steps_per_second": 29.595,
|
| 263 |
"step": 2500
|
| 264 |
},
|
| 265 |
{
|
| 266 |
"epoch": 3.7,
|
| 267 |
+
"eval_f1": 0.5483719296880323,
|
| 268 |
+
"eval_loss": 1.5890936851501465,
|
| 269 |
+
"eval_runtime": 3.2745,
|
| 270 |
+
"eval_samples_per_second": 29.928,
|
| 271 |
+
"eval_steps_per_second": 29.928,
|
| 272 |
"step": 2600
|
| 273 |
},
|
| 274 |
{
|
| 275 |
"epoch": 3.85,
|
| 276 |
+
"eval_f1": 0.5630120856995185,
|
| 277 |
+
"eval_loss": 1.5236029624938965,
|
| 278 |
+
"eval_runtime": 3.2951,
|
| 279 |
+
"eval_samples_per_second": 29.741,
|
| 280 |
+
"eval_steps_per_second": 29.741,
|
| 281 |
"step": 2700
|
| 282 |
},
|
| 283 |
{
|
| 284 |
"epoch": 3.99,
|
| 285 |
+
"eval_f1": 0.5422100713682105,
|
| 286 |
+
"eval_loss": 1.52333664894104,
|
| 287 |
+
"eval_runtime": 3.3261,
|
| 288 |
+
"eval_samples_per_second": 29.464,
|
| 289 |
+
"eval_steps_per_second": 29.464,
|
| 290 |
"step": 2800
|
| 291 |
},
|
| 292 |
{
|
| 293 |
"epoch": 4.13,
|
| 294 |
+
"eval_f1": 0.5469719933620487,
|
| 295 |
+
"eval_loss": 1.6104604005813599,
|
| 296 |
+
"eval_runtime": 3.2888,
|
| 297 |
+
"eval_samples_per_second": 29.798,
|
| 298 |
+
"eval_steps_per_second": 29.798,
|
| 299 |
"step": 2900
|
| 300 |
},
|
| 301 |
{
|
| 302 |
"epoch": 4.27,
|
| 303 |
"learning_rate": 4.358974358974359e-06,
|
| 304 |
+
"loss": 0.3745,
|
| 305 |
"step": 3000
|
| 306 |
},
|
| 307 |
{
|
| 308 |
"epoch": 4.27,
|
| 309 |
+
"eval_f1": 0.5525357490677262,
|
| 310 |
+
"eval_loss": 1.7136110067367554,
|
| 311 |
+
"eval_runtime": 3.3248,
|
| 312 |
+
"eval_samples_per_second": 29.476,
|
| 313 |
+
"eval_steps_per_second": 29.476,
|
| 314 |
"step": 3000
|
| 315 |
},
|
| 316 |
{
|
| 317 |
"epoch": 4.42,
|
| 318 |
+
"eval_f1": 0.5539436259955471,
|
| 319 |
+
"eval_loss": 1.6561492681503296,
|
| 320 |
+
"eval_runtime": 3.2857,
|
| 321 |
+
"eval_samples_per_second": 29.826,
|
| 322 |
+
"eval_steps_per_second": 29.826,
|
| 323 |
"step": 3100
|
| 324 |
},
|
| 325 |
{
|
| 326 |
"epoch": 4.56,
|
| 327 |
+
"eval_f1": 0.5504413375623162,
|
| 328 |
+
"eval_loss": 1.7664132118225098,
|
| 329 |
+
"eval_runtime": 3.2517,
|
| 330 |
+
"eval_samples_per_second": 30.138,
|
| 331 |
+
"eval_steps_per_second": 30.138,
|
| 332 |
"step": 3200
|
| 333 |
},
|
| 334 |
{
|
| 335 |
"epoch": 4.7,
|
| 336 |
+
"eval_f1": 0.5494419672200014,
|
| 337 |
+
"eval_loss": 1.750455379486084,
|
| 338 |
+
"eval_runtime": 3.27,
|
| 339 |
+
"eval_samples_per_second": 29.969,
|
| 340 |
+
"eval_steps_per_second": 29.969,
|
| 341 |
"step": 3300
|
| 342 |
},
|
| 343 |
{
|
| 344 |
"epoch": 4.84,
|
| 345 |
+
"eval_f1": 0.5516497223039627,
|
| 346 |
+
"eval_loss": 1.7312653064727783,
|
| 347 |
+
"eval_runtime": 3.3127,
|
| 348 |
+
"eval_samples_per_second": 29.583,
|
| 349 |
+
"eval_steps_per_second": 29.583,
|
| 350 |
"step": 3400
|
| 351 |
},
|
| 352 |
{
|
| 353 |
"epoch": 4.99,
|
| 354 |
"learning_rate": 8.547008547008547e-08,
|
| 355 |
+
"loss": 0.307,
|
| 356 |
"step": 3500
|
| 357 |
},
|
| 358 |
{
|
| 359 |
"epoch": 4.99,
|
| 360 |
+
"eval_f1": 0.5515045914952008,
|
| 361 |
+
"eval_loss": 1.7193822860717773,
|
| 362 |
+
"eval_runtime": 3.2769,
|
| 363 |
+
"eval_samples_per_second": 29.907,
|
| 364 |
+
"eval_steps_per_second": 29.907,
|
| 365 |
"step": 3500
|
| 366 |
},
|
| 367 |
{
|
| 368 |
"epoch": 5.0,
|
| 369 |
"step": 3510,
|
| 370 |
"total_flos": 2890172619430200.0,
|
| 371 |
+
"train_loss": 0.6706694952103487,
|
| 372 |
+
"train_runtime": 824.1732,
|
| 373 |
+
"train_samples_per_second": 4.259,
|
| 374 |
+
"train_steps_per_second": 4.259
|
| 375 |
}
|
| 376 |
],
|
| 377 |
"max_steps": 3510,
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3899
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5347b81bd66fbac180a70e3615c9c445e5992c7677db1c7c6314dc0b49027803
|
| 3 |
size 3899
|