| # Create and change to the directory | |
| mkdir -p DeepSeek-V2-Chat.Q2_K.gguf | |
| cd DeepSeek-V2-Chat.Q2_K.gguf | |
| # Download the GGUF files | |
| for i in {1..5}; do | |
| wget "https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf?download=true" -O DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf | |
| done | |
| # Download the llama.cpp binaries based on the OS | |
| case "$(uname -s)" in | |
| Linux) | |
| wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-ubuntu-x64.zip | |
| unzip llama-b2961-bin-ubuntu-x64.zip -d . | |
| ;; | |
| Darwin) | |
| if [[ $(uname -m) == 'arm64' ]]; then | |
| wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-arm64.zip | |
| unzip llama-b2961-bin-macos-arm64.zip -d . | |
| else | |
| wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-x64.zip | |
| unzip llama-b2961-bin-macos-x64.zip -d . | |
| fi | |
| ;; | |
| esac | |
| # Execute the server command | |
| ./server \ | |
| -m DeepSeek-V2-Chat.q2_k.gguf \ | |
| -c 4096 \ | |
| -i \ | |
| --mlock | |
| --override-kv deepseek2.attention.q_lora_rank=int:1536 | |
| --override-kv deepseek2.attention.kv_lora_rank=int:512 | |
| --override-kv deepseek2.expert_shared_count=int:2 | |
| --override-kv deepseek2.expert_feed_forward_length=int:1536 | |
| --override-kv deepseek2.leading_dense_block_count=int:1 |