fukugawa
/

transformer-lm-japanese-0.1b

Text Generation

Model card Files Files and versions

fukugawa commited on May 21, 2024

Commit

fe82d0f

·

verified ·

1 Parent(s): e06470c

Upload FlaxTransformerLMForCausalLM

Files changed (2) hide show

flax_model.msgpack +1 -1
modeling_transformerlm_flax.py +3 -0

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f146d3ff30cefcdecb23e15f175395be65b13de37821c4d78b32feb8415f3666
 size 524522413

 version https://git-lfs.github.com/spec/v1
+oid sha256:f43dc830c806b64d6a77027a61d16bd2fcbe896c799d5dbba0a81b9e7f26fc8b
 size 524522413

modeling_transformerlm_flax.py CHANGED Viewed

@@ -404,6 +404,9 @@ class FlaxTransformerLMPreTrainedModel(FlaxPreTrainedModel):
       last_logits, last_cache = last
       lm_logits = jnp.reshape(all_logits, (1, seq_length, vcab_size))
       if not return_dict:
         outputs = (lm_logits,) + (last_cache,)
       else:

       last_logits, last_cache = last
       lm_logits = jnp.reshape(all_logits, (1, seq_length, vcab_size))
+      if input_ids.shape[1] > 1:
+        lm_logits = lm_logits[:, 1:, :] # Ignore leading zeros in prompts
       if not return_dict:
         outputs = (lm_logits,) + (last_cache,)
       else: