Downloading the model

by shayanfirouzian - opened Aug 12

Aug 12

Using the following script:

from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "huihui-ai/DeepSeek-R1-0528-Qwen3-8B-abliterated",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = False,
    token = "MY_HF_TOKEN"
)

I run to this error:
```
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
==((====))== Unsloth 2025.8.4: Fast Qwen3 patching. Transformers: 4.55.0.
\ /| Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ _/ \ Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\ / Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
"-____-" Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'attn_factor'}
model.safetensors.index.json: 100%
33.3k/33.3k [00:00<00:00, 2.53MB/s]
model-00001-of-00004.safetensors: 100%
4.90G/4.90G [00:38<00:00, 197MB/s]
model-00002-of-00004.safetensors: 100%
4.92G/4.92G [00:47<00:00, 395MB/s]
model-00003-of-00004.safetensors: 100%
4.98G/4.98G [00:48<00:00, 337MB/s]
model-00004-of-00004.safetensors: 100%
1.58G/1.58G [00:26<00:00, 342MB/s]
Loading checkpoint shards: 100%
4/4 [01:10<00:00, 15.35s/it]
generation_config.json: 100%
127/127 [00:00<00:00, 9.51kB/s]
WARNING:accelerate.big_modeling:Some parameters are on the meta device because they were offloaded to the cpu.
tokenizer_config.json: 100%
3.96k/3.96k [00:00<00:00, 418kB/s]
tokenizer.json: 100%
7.03M/7.03M [00:00<00:00, 19.0MB/s]
special_tokens_map.json: 100%
508/508 [00:00<00:00, 49.3kB/s]

RuntimeError Traceback (most recent call last)
/tmp/ipython-input-4141484177.py in <cell line: 0>()
3
4
----> 5 model, tokenizer = FastLanguageModel.from_pretrained(
6 model_name = "huihui-ai/DeepSeek-R1-0528-Qwen3-8B-abliterated",
7 max_seq_length = 2048,

/usr/local/lib/python3.11/dist-packages/unsloth/models/loader.py in from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, load_in_8bit, full_finetuning, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, use_exact_model_name, fast_inference, gpu_memory_utilization, float8_kv_cache, random_state, max_lora_rank, disable_log_stats, *args, **kwargs)
392 pass
393
--> 394 model, tokenizer = dispatch_model.from_pretrained(
395 model_name = model_name,
396 max_seq_length = max_seq_length,

/usr/local/lib/python3.11/dist-packages/unsloth/models/qwen3.py in from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, model_patcher, tokenizer_name, trust_remote_code, **kwargs)
418 **kwargs,
419 ):
--> 420 return FastLlamaModel.from_pretrained(
421 model_name = model_name,
422 max_seq_length = max_seq_length,

/usr/local/lib/python3.11/dist-packages/unsloth/models/llama.py in from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, model_patcher, tokenizer_name, trust_remote_code, revision, fast_inference, gpu_memory_utilization, float8_kv_cache, random_state, max_lora_rank, disable_log_stats, unsloth_vllm_standby, num_labels, **kwargs)
2040 # Counteract saved tokenizers
2041 tokenizer_name = model_name if tokenizer_name is None else tokenizer_name
-> 2042 tokenizer = load_correct_tokenizer(
2043 tokenizer_name = tokenizer_name,
2044 model_max_length = max_position_embeddings,

/usr/local/lib/python3.11/dist-packages/unsloth/tokenizer_utils.py in load_correct_tokenizer(tokenizer_name, model_max_length, padding_side, token, trust_remote_code, cache_dir, fix_tokenizer)
584
585 else:
--> 586 chat_template = fix_chat_template(tokenizer)
587 if old_chat_template is not None and chat_template is None:
588 raise RuntimeError(

/usr/local/lib/python3.11/dist-packages/unsloth/tokenizer_utils.py in fix_chat_template(tokenizer)
684 if "{% if add_generation_prompt %}" not in new_chat_template and
685 "{%- if add_generation_prompt %}" not in new_chat_template:
--> 686 raise RuntimeError(
687 f"Unsloth: The tokenizer {tokenizer.name_or_path}\n"
688 "does not have a {% if add_generation_prompt %} for generation purposes.\n"\

RuntimeError: Unsloth: The tokenizer huihui-ai/DeepSeek-R1-0528-Qwen3-8B-abliterated
does not have a {% if add_generation_prompt %} for generation purposes.
Please file a bug report to the maintainers of huihui-ai/DeepSeek-R1-0528-Qwen3-8B-abliterated - thanks!
```

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment