ArtusDev's picture
Upload folder using huggingface_hub
25d681b verified
|
raw
history blame
18.5 kB
---
license: mit
datasets:
- zerofata/Instruct-Anime
- zerofata/Roleplay-Anime-Characters
- zerofata/Instruct-Anime-CreativeWriting
- zerofata/Summaries-Anime-FandomPages
base_model:
- zai-org/GLM-4.5-Air
---
<style>
.container {
--primary-accent: #2B8CCC;
--secondary-accent: #87CEEB;
--glow-primary: rgba(43, 140, 204, 0.4);
--glow-secondary: rgba(135, 206, 235, 0.6);
--bg-main: #F8FEFF;
--bg-container: #FFFFFF;
--bg-card: rgba(240, 248, 255, 0.9);
--text-main: #2C3E50;
--text-muted: #546E7A;
--white: #FFFFFF;
--border-color: #B0E0E6;
--font-title: 'Inter', sans-serif;
--font-body: 'Source Sans Pro', sans-serif;
--font-code: 'JetBrains Mono', monospace;
font-family: var(--font-body);
color: var(--text-main);
line-height: 1.6;
font-weight: 400;
max-width: 1100px;
margin: 20px auto;
padding: 25px;
background-color: var(--bg-main);
background-image:
linear-gradient(135deg, rgba(240, 248, 255, 0.9), rgba(255, 255, 255, 0.7)),
radial-gradient(circle at 20% 80%, rgba(135, 206, 235, 0.1) 0%, transparent 50%),
radial-gradient(circle at 80% 20%, rgba(176, 224, 230, 0.15) 0%, transparent 50%);
min-height: calc(100vh - 40px);
border-radius: 12px;
box-shadow: 0 8px 32px rgba(43, 140, 204, 0.15), 0 2px 8px rgba(135, 206, 235, 0.1);
border: 2px solid var(--border-color);
}
.container .title-container {
background: linear-gradient(135deg, rgba(255, 255, 255, 0.95), rgba(240, 248, 255, 0.9));
backdrop-filter: blur(10px);
margin-bottom: 30px;
border: 2px solid var(--border-color);
border-radius: 16px;
padding: 35px;
text-align: center;
position: relative;
box-shadow:
0 8px 32px rgba(43, 140, 204, 0.12),
inset 0 1px 0 rgba(255, 255, 255, 0.8);
overflow: hidden;
}
/* FLAIR: Dense 24-Spoke Snowflake */
.container .title-container::before {
content: '';
position: absolute;
top: 50%;
left: 50%;
width: 350px;
height: 350px;
margin: -175px 0 0 -175px;
background-image:
radial-gradient(circle at center, transparent 35%, rgba(135, 206, 235, 0.25) 36%, transparent 37%),
conic-gradient(from 0deg,
rgba(176, 224, 230, 0.18) 0deg,
transparent 7.5deg,
rgba(135, 206, 235, 0.15) 15deg,
transparent 22.5deg,
rgba(176, 224, 230, 0.18) 30deg,
transparent 37.5deg,
rgba(135, 206, 235, 0.15) 45deg,
transparent 52.5deg,
rgba(176, 224, 230, 0.18) 60deg,
transparent 67.5deg,
rgba(135, 206, 235, 0.15) 75deg,
transparent 82.5deg,
rgba(176, 224, 230, 0.18) 90deg,
transparent 97.5deg,
rgba(135, 206, 235, 0.15) 105deg,
transparent 112.5deg,
rgba(176, 224, 230, 0.18) 120deg,
transparent 127.5deg,
rgba(135, 206, 235, 0.15) 135deg,
transparent 142.5deg,
rgba(176, 224, 230, 0.18) 150deg,
transparent 157.5deg,
rgba(135, 206, 235, 0.15) 165deg,
transparent 172.5deg,
rgba(176, 224, 230, 0.18) 180deg,
transparent 187.5deg,
rgba(135, 206, 235, 0.15) 195deg,
transparent 202.5deg,
rgba(176, 224, 230, 0.18) 210deg,
transparent 217.5deg,
rgba(135, 206, 235, 0.15) 225deg,
transparent 232.5deg,
rgba(176, 224, 230, 0.18) 240deg,
transparent 247.5deg,
rgba(135, 206, 235, 0.15) 255deg,
transparent 262.5deg,
rgba(176, 224, 230, 0.18) 270deg,
transparent 277.5deg,
rgba(135, 206, 235, 0.15) 285deg,
transparent 292.5deg,
rgba(176, 224, 230, 0.18) 300deg,
transparent 307.5deg,
rgba(135, 206, 235, 0.15) 315deg,
transparent 322.5deg,
rgba(176, 224, 230, 0.18) 330deg,
transparent 337.5deg,
rgba(135, 206, 235, 0.15) 345deg,
transparent 352.5deg,
rgba(176, 224, 230, 0.18) 360deg
);
mask: radial-gradient(circle at center, black 65%, transparent 75%);
-webkit-mask: radial-gradient(circle at center, black 65%, transparent 75%);
z-index: 1;
pointer-events: none;
}
.container .title-container::after {
content: '';
position: absolute;
top: 50%;
left: 50%;
width: 180px;
height: 180px;
margin: -90px 0 0 -90px;
background: conic-gradient(from 0deg,
transparent 0deg,
rgba(43, 140, 204, 0.12) 5deg,
transparent 10deg,
rgba(43, 140, 204, 0.12) 35deg,
transparent 40deg,
rgba(43, 140, 204, 0.12) 65deg,
transparent 70deg,
rgba(43, 140, 204, 0.12) 95deg,
transparent 100deg,
rgba(43, 140, 204, 0.12) 125deg,
transparent 130deg,
rgba(43, 140, 204, 0.12) 155deg,
transparent 160deg,
rgba(43, 140, 204, 0.12) 185deg,
transparent 190deg,
rgba(43, 140, 204, 0.12) 215deg,
transparent 220deg,
rgba(43, 140, 204, 0.12) 245deg,
transparent 250deg,
rgba(43, 140, 204, 0.12) 275deg,
transparent 280deg,
rgba(43, 140, 204, 0.12) 305deg,
transparent 310deg,
rgba(43, 140, 204, 0.12) 335deg,
transparent 340deg
);
mask: radial-gradient(circle at center, transparent 25%, black 30%, black 40%, transparent 45%);
-webkit-mask: radial-gradient(circle at center, transparent 25%, black 30%, black 40%, transparent 45%);
z-index: 1;
pointer-events: none;
}
.container .title-container .title-wrapper {
position: relative;
z-index: 2;
}
.container .title-main {
color: var(--text-main);
font-size: 3.2rem;
font-weight: 900;
margin: 0;
letter-spacing: 4px;
display: block;
text-transform: uppercase;
background: linear-gradient(135deg, var(--primary-accent), var(--secondary-accent));
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-family: var(--font-title);
filter: drop-shadow(0 4px 8px rgba(43, 140, 204, 0.4)) drop-shadow(0 2px 4px rgba(255, 255, 255, 0.6));
text-shadow:
0 0 20px rgba(255, 255, 255, 0.8),
0 0 40px rgba(135, 206, 235, 0.6),
0 4px 8px rgba(43, 140, 204, 0.3);
position: relative;
}
.container .lemonade-text {
background: linear-gradient(135deg, var(--secondary-accent), #B0E0E6);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
filter: drop-shadow(0 2px 4px rgba(135, 206, 235, 0.3));
}
.container .title-subtitle {
padding-left: 0;
margin-top: 15px;
}
.container .subtitle-text {
color: var(--text-muted);
font-size: 1.2rem;
font-family: var(--font-body);
font-style: italic;
font-weight: 400;
letter-spacing: 2px;
text-transform: uppercase;
opacity: 0.8;
}
.container img {
max-width: 100%;
border: 3px solid var(--border-color);
margin-bottom: 40px;
box-shadow:
0 12px 24px rgba(43, 140, 204, 0.15),
0 4px 8px rgba(135, 206, 235, 0.1);
border-radius: 12px;
}
.container .section-container {
margin-bottom: 30px;
padding: 25px;
background: rgba(255, 255, 255, 0.6);
border: 1px solid var(--border-color);
border-radius: 12px;
box-shadow: 0 4px 16px rgba(43, 140, 204, 0.08);
}
.container .section-container:last-of-type {
margin-bottom: 0;
}
.container .section-header {
display: flex;
align-items: center;
padding: 0 0 20px 0;
border-bottom: 2px solid var(--border-color);
margin-bottom: 20px;
}
.container .section-title {
font-family: var(--font-title);
background: linear-gradient(45deg, var(--secondary-accent), var(--primary-accent));
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 1.4rem;
margin: 0 !important;
padding: 0 0 10px 0 !important;
letter-spacing: 1px;
font-weight: 700;
text-transform: uppercase;
border: none !important;
position: relative;
display: inline-block;
}
.container .section-title::after {
content: '';
position: absolute;
bottom: -2px;
left: 0;
width: 100%;
height: 3px;
background-image: linear-gradient(to right, var(--secondary-accent), var(--primary-accent));
border-radius: 2px;
}
.container .section-content {
padding: 0;
}
.container .subheading {
color: var(--primary-accent);
font-size: 1.2rem;
margin-top: 25px;
margin-bottom: 15px;
font-weight: 600;
display: block;
text-transform: uppercase;
letter-spacing: 1px;
font-family: var(--font-title);
border-bottom: 2px solid var(--primary-accent);
padding-bottom: 8px;
}
.container .data-box {
background-color: var(--bg-card);
padding: 20px;
border: 2px solid var(--border-color);
border-left: 4px solid var(--primary-accent);
margin-bottom: 20px;
box-shadow: 0 4px 12px rgba(43, 140, 204, 0.1);
border-radius: 8px;
font-size: 1rem;
}
.container .data-row {
display: flex;
align-items: center;
margin-bottom: 6px;
padding: 5px 0;
}
.container .data-row:last-child {
margin-bottom: 0;
}
.container .data-arrow {
color: var(--primary-accent);
font-weight: bold;
margin-right: 12px;
font-family: var(--font-code);
font-size: 1.1rem;
}
.container .data-label {
color: var(--text-main);
font-weight: 600;
font-family: var(--font-body);
margin-right: 10px;
min-width: 90px;
}
.container a {
color: var(--primary-accent);
text-decoration: none;
font-weight: 600;
transition: all .2s;
}
.container .data-row a {
border-bottom: 1px dotted var(--primary-accent);
}
.container a:hover {
text-decoration: none;
color: var(--secondary-accent);
transform: translateY(-1px);
}
.container .data-row a:hover {
border-bottom-style: solid;
}
.container .dropdown-container {
margin-top: 20px;
}
.container .dropdown-summary {
cursor: pointer;
padding: 10px 0;
color: var(--text-muted);
font-size: 1.1rem;
font-weight: 700;
text-transform: none;
font-family: var(--font-title);
letter-spacing: 1px;
list-style: none;
transition: color 0.2s ease;
}
.container .dropdown-summary:hover {
color: var(--primary-accent);
}
.container .dropdown-arrow {
color: var(--secondary-accent);
margin-right: 10px;
transition: transform 0.2s ease;
}
.container .dropdown-content {
margin-top: 15px;
padding: 25px;
background-color: var(--bg-card);
border: 2px solid var(--border-color);
border-radius: 8px;
box-shadow: 0 4px 12px rgba(43, 140, 204, 0.1);
}
.container .config-title {
color: var(--text-muted);
font-size: 1rem;
margin-bottom: 10px;
font-family: var(--font-body);
text-transform: uppercase;
letter-spacing: 1px;
font-weight: 700;
}
.container pre {
background-color: #f8f9fa;
padding: 20px;
border: 2px solid var(--border-color);
white-space: pre-wrap;
word-wrap: break-word;
color: var(--text-main);
border-radius: 8px;
box-shadow: inset 0 2px 4px rgba(43, 140, 204, 0.1);
}
.container pre code {
background: none;
color: inherit;
padding: 0;
border-radius: 0;
}
.container code {
font-family: var(--font-code);
color: var(--primary-accent);
background: rgba(176, 224, 230, 0.2);
padding: 3px 6px;
border-radius: 4px;
}
</style>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Iceblink</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&family=Source+Sans+Pro:ital,wght@0,400;0,600;1,400&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
</head>
<body>
<div class="container">
<div class="title-container">
<div class="glitchy-overlay"></div>
<div class="title-wrapper">
<h1 class="title-main">
<span class="title-prefix">ICEBLINK</span>
</h1>
</div>
</div>
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/s4Z-P5He2Z3hXD9XtYfwy.png)
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Overview</h2>
</div>
<div class="section-content">
<p>An experimental GLM4.5 Air finetune.</p>
<p>Had this one in the works for a while, but was struggling to find the right hyperparams to get this model to behave nicely. Thank you to TheDrummer for helping me out with them.</p>
<p>This model is a creative writing and RP model. It's pretty verbose. The intent is to keep the behavior of the original model, but to slightly improve writing, dialogue & creativity.</p>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">SillyTavern Settings</h2>
</div>
<div class="section-content">
<h3 class="subheading">Recommended Roleplay Format</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Actions:</span>
<span>In plaintext</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Dialogue:</span>
<span>"In quotes"</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Thoughts:</span>
<span>*In asterisks*</span>
</div>
</div>
<h3 class="subheading">Recommended Samplers</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Temp:</span>
<span>0.8</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">MinP:</span>
<span>0.05</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">TopP:</span>
<span>0.95</span>
</div>
</div>
<h3 class="subheading">Instruct</h3>
<div class="data-box">
<p style="margin: 0;">GLM4.5 (no thinking): <a href="https://huggingface.co/zerofata/GLM-4.5-Iceblink-106B-A12B/raw/main/GLM45-NoThink-SillyTavern-Preset.json">SillyTavern Preset</a></p>
</div>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Quantizations</h2>
</div>
<div class="section-content">
<div style="margin-bottom: 20px;">
<h3 class="subheading">GGUF</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/bartowski/zerofata_GLM-4.5-Iceblink-106B-A12B-GGUF">iMatrix (bartowski)</a>
</div>
</div>
</div>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Creation Process</h2>
</div>
<div class="section-content">
<p>Creation Process: SFT</p>
<p>SFT on approx 10 million tokens, SFW / NSFW RP, stories, creative instruct & chat data.</p>
<p>MoE are brutal to train even with a small dataset like mine, so I took a different approach from usual. I used a very low LR in an effort to avoid having to apply DPO / KTO training afterwards.</p>
<p>I think there's likely a better config to be found, but experimentation with the model to find it is quite draining.</p>
<div class="dropdown-container">
<details>
<summary class="dropdown-summary">
<span class="dropdown-arrow">></span>
Axolotl configs
</summary>
<div class="dropdown-content">
<p>Not optimized for cost / performance efficiency, YMMV.</p>
<div class="config-title">SFT (4*H200)</div>
<pre><code>base_model: zai-org/GLM-4.5-Air
eot_tokens:
- "&lt;|user|&gt;"
- "&lt;|endoftext|&gt;"
special_tokens:
eos_token: "&lt;|user|&gt;"
<br>
&#35; ====================
&#35; DATASET CONFIGURATION
&#35; ====================
datasets:
- path: ./data/dataset.jsonl
type: chat_template
split: train
field_messages: messages
message_property_mappings:
role: role
content: content
roles:
user: ["user"]
assistant: ["assistant"]
system: ["system"]
<br>
dataset_prepared_path: ./last_run_prepared
train_on_inputs: false &#35; Only train on assistant responses
eval_sample_packing: False
<br>
&#35; ====================
&#35; QLORA CONFIGURATION
&#35; ====================
adapter: qlora
load_in_4bit: true
lora_r: 32
lora_alpha: 32
lora_dropout: 0.1
lora_target_modules:
- gate_proj
- down_proj
- up_proj
- q_proj
- v_proj
- k_proj
- o_proj
&#35; lora_modules_to_save: &#35; Uncomment only if you added NEW tokens
<br>
&#35; ====================
&#35; TRAINING PARAMETERS
&#35; ====================
num_epochs: 3
micro_batch_size: 2
gradient_accumulation_steps: 4
learning_rate: 4.5e-6
optimizer: paged_adamw_8bit
lr_scheduler: rex
warmup_ratio: 0.05
weight_decay: 0.01
max_grad_norm: 1.0
val_set_size: 0.02
<br>
&#35; ====================
&#35; SEQUENCE &amp; PACKING
&#35; ====================
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
<br>
&#35; ====================
&#35; HARDWARE OPTIMIZATIONS
&#35; ====================
bf16: auto
flash_attention: true
gradient_checkpointing: true
<br>
plugins:
- axolotl.integrations.liger.LigerPlugin
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
liger_rope: false
liger_rms_norm: true
liger_layer_norm: true
liger_glu_activation: true
liger_fused_linear_cross_entropy: true
cut_cross_entropy: false
<br>
deepspeed: deepspeed_configs/zero1.json
<br>
&#35; ====================
&#35; EVALUATION &amp; CHECKPOINTING
&#35; ====================
save_strategy: steps
save_steps: 20
eval_steps: 35
save_total_limit: 18 &#35; Keep best + last few checkpoints
load_best_model_at_end: true
metric_for_best_model: eval_loss
greater_is_better: false
<br>
&#35; ====================
&#35; LOGGING &amp; OUTPUT
&#35; ====================
output_dir: ./GLM-AIR-SFT_v2-5
logging_steps: 1
save_safetensors: true
<br>
&#35; ====================
&#35; WANDB TRACKING
&#35; ====================
wandb_project: GLM-AIR-SFT
&#35; wandb_entity: your_entity
wandb_name: GLM-AIR-SFT_v2-5</code></pre>
</div>
</details>
</div>
</div>
</div>
</div>
</body>
</html>