zerofata's picture
Update README.md
1ad2576 verified
---
license: mit
base_model:
- zerofata/MS3.2-PaintedFantasy-Visage-v4-34B
---
<style>
@keyframes fallingleaf {
0% { transform: translateY(-20px) rotate(0deg); opacity: 1; }
100% { transform: translateY(100vh) rotate(360deg); opacity: 0.3; }
}
.container {
--primary-accent: #7BC4C4;
--secondary-accent: #9FD4D4;
--accent-warm: #E8A5B5;
--red-deep: #2C5F6F;
--gold-bright: #B4DDD4;
--ink-dark: #000000;
--bg-main: #0A1A1A;
--bg-container: #0F2525;
--bg-card: rgba(15, 30, 35, 0.95);
--text-main: #D5E8E8;
--text-muted: #9CB9C4;
--white: #FFFFFF;
--border-color: #6B9BAA;
--font-title: 'Inter', sans-serif;
--font-body: 'Source Sans Pro', sans-serif;
--font-code: 'JetBrains Mono', monospace;
font-family: var(--font-body);
color: var(--text-main);
line-height: 1.7;
font-weight: 400;
max-width: 1100px;
margin: 20px auto;
padding: 50px 40px;
background-color: var(--bg-main);
background-image:
radial-gradient(ellipse at top, rgba(20, 45, 60, 0.3), transparent 60%),
radial-gradient(ellipse at bottom, rgba(0, 0, 0, 0.8), transparent 50%),
linear-gradient(180deg,
rgba(15, 30, 40, 0.6) 0%,
rgba(12, 25, 35, 0.7) 25%,
rgba(10, 20, 28, 0.8) 50%,
rgba(8, 16, 22, 0.85) 75%,
rgba(6, 12, 18, 0.9) 100%);
min-height: calc(100vh - 40px);
position: relative;
z-index: 2;
border-radius: 3px;
box-shadow:
0 10px 60px rgba(0, 0, 0, 0.8),
0 2px 10px rgba(0, 0, 0, 0.9),
inset 0 0 120px rgba(20, 40, 55, 0.1);
border: 2px solid rgba(30, 60, 75, 0.4);
}
.container::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-image:
radial-gradient(circle 3px at 15% 12%, rgba(123, 196, 196, 0.4) 0%, transparent 100%),
radial-gradient(circle 2px at 25% 18%, rgba(159, 212, 212, 0.35) 0%, transparent 100%),
radial-gradient(circle 4px at 45% 25%, rgba(232, 165, 181, 0.4) 0%, transparent 100%),
radial-gradient(circle 3px at 65% 15%, rgba(123, 196, 196, 0.4) 0%, transparent 100%),
radial-gradient(circle 2px at 85% 20%, rgba(196, 240, 234, 0.35) 0%, transparent 100%),
radial-gradient(circle 3px at 35% 35%, rgba(232, 165, 181, 0.35) 0%, transparent 100%),
radial-gradient(circle 2px at 75% 40%, rgba(180, 221, 221, 0.35) 0%, transparent 100%),
radial-gradient(circle 4px at 20% 50%, rgba(123, 196, 196, 0.45) 0%, transparent 100%),
radial-gradient(circle 3px at 90% 55%, rgba(196, 240, 234, 0.4) 0%, transparent 100%),
radial-gradient(circle 2px at 50% 60%, rgba(232, 165, 181, 0.3) 0%, transparent 100%);
pointer-events: none;
border-radius: 3px;
}
.container .title-container {
background:
radial-gradient(circle at 15% 25%, rgba(30, 65, 80, 0.4) 0%, transparent 40%),
radial-gradient(circle at 85% 70%, rgba(123, 196, 196, 0.2) 0%, transparent 45%),
radial-gradient(circle at 50% 50%, rgba(20, 45, 60, 0.35) 0%, transparent 60%),
linear-gradient(135deg,
rgba(20, 45, 60, 0.3) 0%,
rgba(15, 35, 50, 0.4) 50%,
rgba(12, 28, 40, 0.5) 100%);
margin-bottom: 50px;
border: 3px solid;
border-image: linear-gradient(135deg,
rgba(123, 196, 196, 0.5) 0%,
rgba(159, 212, 212, 0.6) 50%,
rgba(123, 196, 196, 0.5) 100%) 1;
border-radius: 0;
padding: 60px 50px 70px;
text-align: center;
position: relative;
overflow: hidden;
box-shadow:
0 10px 50px rgba(0, 0, 0, 0.7),
0 4px 20px rgba(0, 0, 0, 0.8),
inset 0 2px 8px rgba(123, 196, 196, 0.15);
}
.container .title-container::before {
content: '';
position: absolute;
top: -50%;
right: -10%;
width: 300px;
height: 300px;
background: radial-gradient(circle, rgba(44, 95, 111, 0.3) 0%, transparent 70%);
border-radius: 40% 60% 70% 30%;
filter: blur(40px);
}
.container .title-container::after {
content: '';
position: absolute;
bottom: -30%;
left: -5%;
width: 250px;
height: 250px;
background: radial-gradient(circle, rgba(232, 165, 181, 0.2) 0%, transparent 70%);
border-radius: 60% 40% 30% 70%;
filter: blur(35px);
}
.container .title-container .title-wrapper {
position: relative;
}
.container .title-main {
font-size: 3.5rem;
font-weight: 800;
margin: 0;
letter-spacing: 6px;
text-transform: uppercase;
font-family: var(--font-title);
position: relative;
line-height: 1.2;
z-index: 1;
}
.container .title-main .title-prefix {
display: block;
background: linear-gradient(135deg, #6EC5C5 0%, #E8A5B5 35%, #9FD4D4 70%, #B4E8DD 100%);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
filter: drop-shadow(0 3px 12px rgba(110, 197, 197, 0.6)) drop-shadow(0 0 20px rgba(232, 165, 181, 0.4));
margin-bottom: 15px;
font-size: 3.5rem;
letter-spacing: 6px;
font-weight: 700;
position: relative;
padding-bottom: 12px;
font-family: 'Cinzel', serif;
text-transform: uppercase;
}
.container .title-main .title-prefix::after {
content: '';
position: absolute;
bottom: 0;
left: 50%;
transform: translateX(-50%);
width: 80px;
height: 4px;
background: linear-gradient(90deg,
transparent 0%,
rgba(159, 212, 212, 0.8) 35%,
rgba(232, 165, 181, 0.8) 65%,
transparent 100%);
border-radius: 2px;
box-shadow: 0 0 10px rgba(232, 165, 181, 0.5);
}
.container .title-main .title-version {
display: inline-block;
background: linear-gradient(135deg, #8DD4D4 0%, #A8E0E0 50%, #C4F0EA 100%);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
filter: drop-shadow(0 2px 8px rgba(141, 212, 212, 0.6)) drop-shadow(0 0 15px rgba(168, 224, 224, 0.5));
font-size: 2.5rem;
letter-spacing: 4px;
font-weight: 700;
position: relative;
padding: 5px 15px;
border: 2px solid transparent;
border-image: linear-gradient(135deg,
rgba(141, 212, 212, 0.4),
rgba(168, 224, 224, 0.6),
rgba(141, 212, 212, 0.4)) 1;
font-family: 'Cinzel', serif;
text-transform: uppercase;
}
.container .title-main .title-version::before {
content: '';
position: absolute;
top: -8px;
left: -8px;
right: -8px;
bottom: -8px;
background: linear-gradient(135deg, rgba(141, 212, 212, 0.15), rgba(44, 95, 111, 0.1));
z-index: -1;
transform: skew(-2deg);
}
.container .lemonade-text {
background: linear-gradient(135deg, var(--red-deep), var(--primary-accent));
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
filter: drop-shadow(0 2px 6px rgba(232, 107, 142, 0.6));
}
.container .title-subtitle {
padding-left: 0;
margin-top: 15px;
}
.container .subtitle-text {
color: var(--text-muted);
font-size: 1.2rem;
font-family: var(--font-body);
font-style: italic;
font-weight: 400;
letter-spacing: 2px;
text-transform: uppercase;
opacity: 0.9;
text-shadow: 0 0 10px rgba(159, 212, 212, 0.3);
}
.container img {
max-width: 100%;
border: 2px solid rgba(44, 95, 111, 0.5);
margin-bottom: 40px;
box-shadow:
0 8px 30px rgba(0, 0, 0, 0.7),
0 3px 12px rgba(123, 196, 196, 0.4),
0 0 20px rgba(232, 165, 181, 0.3);
border-radius: 2px;
position: relative;
}
.container img::after {
content: '';
position: absolute;
inset: -5px;
background: linear-gradient(135deg,
rgba(123, 196, 196, 0.2),
rgba(232, 165, 181, 0.15));
filter: blur(8px);
z-index: -1;
}
.container .section-container {
margin-bottom: 35px;
padding: 30px;
background: linear-gradient(135deg,
rgba(25, 55, 70, 0.85) 0%,
rgba(30, 65, 80, 0.9) 50%,
rgba(25, 55, 70, 0.85) 100%);
border: 2px solid transparent;
border-image: linear-gradient(135deg,
rgba(123, 196, 196, 0.6),
rgba(159, 212, 212, 0.7),
rgba(123, 196, 196, 0.6)) 1;
border-radius: 0;
box-shadow:
0 8px 35px rgba(0, 0, 0, 0.7),
inset 0 2px 8px rgba(123, 196, 196, 0.3),
0 0 25px rgba(123, 196, 196, 0.3),
inset 0 -1px 20px rgba(159, 212, 212, 0.12);
position: relative;
}
.container .section-container::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background:
radial-gradient(circle at 20% 30%, rgba(123, 196, 196, 0.25) 0%, transparent 50%),
radial-gradient(circle at 80% 70%, rgba(159, 212, 212, 0.15) 0%, transparent 50%),
radial-gradient(circle at 50% 10%, rgba(232, 165, 181, 0.1) 0%, transparent 60%);
pointer-events: none;
z-index: 0;
}
.container .section-container:last-of-type {
margin-bottom: 0;
}
.container .section-header {
display: flex;
align-items: center;
padding: 0 0 18px 0;
border-bottom: 2px solid rgba(123, 196, 196, 0.4);
margin-bottom: 25px;
position: relative;
z-index: 1;
}
.container .section-title {
font-family: var(--font-title);
background: linear-gradient(135deg,
#9FD4D4 0%,
#B4E8DD 100%);
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 1.5rem;
margin: 0 !important;
padding: 0 !important;
letter-spacing: 2px;
font-weight: 700;
text-transform: uppercase;
border: none !important;
position: relative;
display: inline-block;
filter: drop-shadow(0 2px 4px rgba(123, 196, 196, 0.5));
}
.container .section-title::after {
content: '';
position: absolute;
bottom: -18px;
left: 0;
width: 50%;
height: 2px;
background-image: linear-gradient(to right,
rgba(159, 212, 212, 0.7),
transparent);
opacity: 0.8;
}
.container .section-content {
padding: 0;
position: relative;
z-index: 1;
}
.container .subheading {
color: var(--secondary-accent);
font-size: 1.25rem;
margin-top: 28px;
margin-bottom: 18px;
font-weight: 600;
display: block;
text-transform: uppercase;
letter-spacing: 1.5px;
font-family: var(--font-title);
border-bottom: 2px solid transparent;
border-image: linear-gradient(to right,
var(--primary-accent),
var(--secondary-accent),
transparent) 1;
padding-bottom: 10px;
text-shadow: 0 0 10px rgba(159, 212, 212, 0.4);
}
.container .data-box {
background: linear-gradient(135deg,
rgba(20, 40, 50, 0.6) 0%,
rgba(25, 50, 60, 0.7) 100%);
padding: 22px;
border: 2px solid rgba(107, 155, 170, 0.3);
border-left: 5px solid var(--primary-accent);
margin-bottom: 20px;
box-shadow:
0 4px 15px rgba(0, 0, 0, 0.5),
inset 0 1px 2px rgba(44, 95, 111, 0.3),
0 0 10px rgba(232, 107, 142, 0.15);
border-radius: 0;
font-size: 1rem;
position: relative;
}
.container .data-box::after {
content: '';
position: absolute;
top: 0;
right: 0;
width: 100px;
height: 100px;
background: radial-gradient(circle at top right, rgba(123, 196, 196, 0.08) 0%, transparent 60%);
pointer-events: none;
border-radius: 0 0 0 100%;
}
.container .data-row {
display: flex;
align-items: center;
margin-bottom: 6px;
padding: 5px 0;
}
.container .data-row:last-child {
margin-bottom: 0;
}
.container .data-arrow {
color: var(--secondary-accent);
font-weight: bold;
margin-right: 12px;
font-family: var(--font-code);
font-size: 1.1rem;
text-shadow: 0 0 8px rgba(159, 212, 212, 0.5);
}
.container .data-label {
color: var(--text-main);
font-weight: 600;
font-family: var(--font-body);
margin-right: 10px;
min-width: 90px;
}
.container a {
color: var(--primary-accent);
text-decoration: none;
font-weight: 600;
transition: all .2s;
text-shadow: 0 0 10px rgba(232, 107, 142, 0.4);
}
.container .data-row a {
border-bottom: 1px dotted var(--primary-accent);
}
.container a:hover {
text-decoration: none;
color: var(--secondary-accent);
transform: translateY(-1px);
filter: drop-shadow(0 2px 6px rgba(159, 212, 212, 0.6));
text-shadow: 0 0 15px rgba(159, 212, 212, 0.8);
}
.container .data-row a:hover {
border-bottom-style: solid;
border-bottom-color: var(--secondary-accent);
}
.container .dropdown-container {
margin-top: 20px;
}
.container .dropdown-summary {
cursor: pointer;
padding: 10px 0;
color: var(--text-muted);
font-size: 1.1rem;
font-weight: 700;
text-transform: none;
font-family: var(--font-title);
letter-spacing: 1px;
list-style: none;
transition: color 0.2s ease;
}
.container .dropdown-summary:hover {
color: var(--primary-accent);
}
.container .dropdown-arrow {
color: var(--primary-accent);
margin-right: 10px;
transition: transform 0.2s ease;
}
.container .dropdown-content {
margin-top: 15px;
padding: 25px;
background: linear-gradient(135deg,
rgba(15, 30, 35, 0.8) 0%,
rgba(20, 40, 50, 0.85) 100%);
border: 2px solid rgba(107, 155, 170, 0.3);
border-radius: 0;
box-shadow:
0 4px 15px rgba(0, 0, 0, 0.6),
inset 0 1px 3px rgba(44, 95, 111, 0.3),
0 0 10px rgba(232, 107, 142, 0.15);
}
.container .config-title {
color: var(--secondary-accent);
font-size: 1rem;
margin-bottom: 10px;
font-family: var(--font-body);
text-transform: uppercase;
letter-spacing: 1px;
font-weight: 700;
text-shadow: 0 0 10px rgba(159, 212, 212, 0.4);
}
.container pre {
background: linear-gradient(135deg,
rgba(20, 5, 5, 0.9) 0%,
rgba(30, 8, 8, 0.95) 100%);
padding: 22px;
border: 2px solid rgba(139, 0, 0, 0.3);
white-space: pre-wrap;
word-wrap: break-word;
color: var(--text-main);
border-radius: 0;
box-shadow:
inset 0 2px 6px rgba(0, 0, 0, 0.5),
0 4px 12px rgba(0, 0, 0, 0.6);
}
.container pre code {
background: none;
color: inherit;
padding: 0;
border-radius: 0;
}
.container code {
font-family: var(--font-code);
color: var(--secondary-accent);
background: rgba(159, 212, 212, 0.15);
padding: 3px 7px;
border-radius: 2px;
box-shadow: 0 1px 3px rgba(159, 212, 212, 0.3);
}
body {
background: linear-gradient(180deg,
#0A1F2E 0%,
#0D2838 15%,
#102D3D 30%,
#0D2838 50%,
#0A1F2E 70%,
#071520 85%,
#040D15 100%);
background-attachment: fixed;
margin: 0;
padding: 0;
position: relative;
overflow-x: hidden;
}
body::before {
content: '';
position: fixed;
left: -10%;
bottom: 0;
width: 25%;
height: 100%;
background-image:
radial-gradient(ellipse 80px 200px at 20% 90%, rgba(0, 0, 0, 0.95), transparent),
radial-gradient(ellipse 100px 300px at 15% 85%, rgba(0, 0, 0, 0.9), transparent),
radial-gradient(ellipse 60px 250px at 25% 88%, rgba(0, 0, 0, 0.92), transparent),
radial-gradient(ellipse 70px 180px at 10% 92%, rgba(0, 0, 0, 0.93), transparent),
linear-gradient(to top, rgba(0, 0, 0, 0.8) 0%, transparent 60%);
pointer-events: none;
z-index: 1;
}
body::after {
content: '';
position: fixed;
right: -10%;
bottom: 0;
width: 25%;
height: 100%;
background-image:
radial-gradient(ellipse 90px 220px at 80% 88%, rgba(0, 0, 0, 0.95), transparent),
radial-gradient(ellipse 110px 320px at 85% 83%, rgba(0, 0, 0, 0.9), transparent),
radial-gradient(ellipse 65px 260px at 75% 86%, rgba(0, 0, 0, 0.92), transparent),
radial-gradient(ellipse 80px 190px at 90% 90%, rgba(0, 0, 0, 0.93), transparent),
linear-gradient(to top, rgba(0, 0, 0, 0.8) 0%, transparent 60%);
pointer-events: none;
z-index: 1;
}
.container p {
position: relative;
z-index: 1;
}
</style>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>PaintedFantasy-Visage-v4</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&family=Source+Sans+Pro:ital,wght@0,400;0,600;1,400&family=JetBrains+Mono:wght@400;500&family=Cinzel:wght@400;600;700&display=swap" rel="stylesheet">
</head>
<body>
<div class="container">
<div class="title-container">
<div class="title-wrapper">
<h1 class="title-main">
<span class="title-prefix">Painted Fantasy</span>
<span class="title-version">Visage v4</span>
</h1>
</div>
</div>
![image](https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/aphW6mrYxqywL4YT4iW0f.png)
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Overview</h2>
</div>
<div class="section-content">
<p>Magistral 24B Upscaled to 34B.</p>
<p>The latest Magistral model seems pretty good. Has some refreshing prose.</p>
<p>This model is an uncensored, creative writing and RP model. It uses a new (still a work in progress) dataset I've been curating based on real character cards.</p>
<p>Has some structural repetition, at this point it's a calling card of Mistral models. I think it's better than v3 though.</p>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">SillyTavern Settings</h2>
</div>
<div class="section-content">
<h3 class="subheading">Recommended Roleplay Format</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Actions:</span>
<span>In plaintext</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Dialogue:</span>
<span>"In quotes"</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Thoughts:</span>
<span>*In asterisks*</span>
</div>
</div>
<h3 class="subheading">Recommended Samplers</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Temp:</span>
<span>0.8</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">MinP:</span>
<span>0.05</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">TopP:</span>
<span>0.95</span>
</div>
</div>
<h3 class="subheading">Instruct</h3>
<div class="data-box">
<p style="margin: 0;">Mistral v7 Tekken</p>
</div>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Quantizations</h2>
</div>
<div class="section-content">
<div style="margin-bottom: 20px;">
<h3 class="subheading">EXL3</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v4-34b-exl3-3bpw">3bpw</a>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v4-34b-exl3-4bpw">4bpw</a>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v4-34b-exl3-5bpw">5bpw</a>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v4-34b-exl3-6bpw">6bpw</a>
</div>
</div>
</div>
</div>
<div class="section-content">
<div style="margin-bottom: 20px;">
<h3 class="subheading">GGUF</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-Visage-v4-34B-i1-GGUF">iMatrix (mradermacher)</a>
</div>
</div>
</div>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Creation Process</h2>
</div>
<div class="section-content">
<p>Creation Process: Upscale > CPT > SFT > Merge</p>
<p>After upscaling, was pretrained on approx 100MB of light novels and a subset of DCLM records.</p>
<p>SFT on approx 10 million tokens, SFW / NSFW RP, stories and creative instruct. I've removed some chat data which I think hurt more than helped and replaced it with conversations from real character cards.</p>
<p>Did some experimenting with lora methods. Particularly dora vs rslora. With dora the writing was fantastic, but the model wasn't able to handle its own creativity, even with further RLHF applied. Rslora took the data far better, but was significantly less adept at writing. Merged the two models together, using the stable version as a base, which seems to have successfully combined the positives of both models.</p>
<div class="dropdown-container">
<details>
<summary class="dropdown-summary">
<span class="dropdown-arrow">></span>
Mergekit configs
</summary>
<div class="dropdown-content">
<div class="config-title">Upscale</div>
<pre><code>base_model: Darkhn/Magistral-Small-2509-Text-Only
merge_method: passthrough
dtype: bfloat16
slices:
- sources:
- model: Darkhn/Magistral-Small-2509-Text-Only
layer_range: [0, 29]
- sources:
- model: Darkhn/Magistral-Small-2509-Text-Only
layer_range: [10, 40]</code></pre>
<div class="config-title">Slerp Merge</div>
<pre><code>models:
- model: ApocalypseParty/Magi-PT-2-SFT-1-DPO-3
- model: ApocalypseParty/Magi-PT-2-SFT-2
merge_method: slerp
base_model: ApocalypseParty/Magi-PT-2-SFT-2
parameters:
t: [0, 0, 0, 0.1, 0.2]
dtype: bfloat16</code></pre>
</div>
</details>
</div>
<div class="dropdown-container">
<details>
<summary class="dropdown-summary">
<span class="dropdown-arrow">></span>
Axolotl configs
</summary>
<div class="dropdown-content">
<div class="config-title">Pretrain (2*H100)</div>
<pre><code>&#35; ====================
&#35; MODEL CONFIGURATION
&#35; ====================
base_model: ApocalypseParty/magistral-34b
model_type: MistralForCausalLM
tokenizer_type: AutoTokenizer
chat_template: mistral_v7_tekken
&#35; ====================
&#35; DATASET CONFIGURATION
&#35; ====================
datasets:
- path: ./data/text_files_minimal_dataset.jsonl
type: completion
- path: ./data/filtered_results.jsonl
type: completion
<br>
dataset_prepared_path:
train_on_inputs: false &#35; Only train on assistant responses
<br>
&#35; ====================
&#35; QLORA CONFIGURATION
&#35; ====================
adapter: qlora
load_in_4bit: true
lora_r: 64
lora_alpha: 64
lora_dropout: 0.0
lora_target_linear: true
&#35; lora_modules_to_save: &#35; Uncomment only if you added NEW tokens
<br>
&#35; ====================
&#35; TRAINING PARAMETERS
&#35; ====================
num_epochs: 1
micro_batch_size: 4
gradient_accumulation_steps: 1
learning_rate: 3e-5
optimizer: paged_adamw_8bit
lr_scheduler: rex
warmup_ratio: 0.05
weight_decay: 0.0
max_grad_norm: 1.0
<br>
&#35; ====================
&#35; SEQUENCE &amp; PACKING
&#35; ====================
sequence_len: 16384
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
<br>
&#35; ====================
&#35; HARDWARE OPTIMIZATIONS
&#35; ====================
bf16: auto
flash_attention: true
gradient_checkpointing: offload
deepspeed: deepspeed_configs/zero1.json
<br>
plugins:
- axolotl.integrations.liger.LigerPlugin
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
cut_cross_entropy: true
liger_rope: true
liger_rms_norm: true
liger_layer_norm: true
liger_glu_activation: true
liger_cross_entropy: false &#35; Cut Cross Entropy overrides this
liger_fused_linear_cross_entropy: false &#35; Cut Cross Entropy overrides this
<br>
&#35; ====================
&#35; EVALUATION &amp; CHECKPOINTING
&#35; ====================
save_strategy: steps
save_steps: 40
save_total_limit: 5 &#35; Keep best + last few checkpoints
load_best_model_at_end: true
greater_is_better: false
<br>
&#35; ====================
&#35; LOGGING &amp; OUTPUT
&#35; ====================
output_dir: ./Magi-PT-2
logging_steps: 1
save_safetensors: true
<br>
&#35; ====================
&#35; WANDB TRACKING
&#35; ====================
wandb_project: Magi-PT
&#35; wandb_entity: your_entity
wandb_name: Magi-PT-2</code></pre>
<div class="config-title">SFT (2*H100)</div>
<pre><code>base_model: ApocalypseParty/Magi-PT-2
model_type: MistralForCausalLM
tokenizer_type: AutoTokenizer
chat_template: mistral_v7_tekken
<br>
plugins:
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
<br>
load_in_8bit: true
load_in_bit: false
deepspeed: deepspeed_configs/zero1.json
<br>
datasets:
- path: ./data/automated_dataset.jsonl
type: chat_template
- path: ./data/chub_dataset.jsonl
type: chat_template
- path: ./data/handcrafted_dataset.jsonl
type: chat_template
- path: ./data/cw_dataset.jsonl
type: chat_template
- path: ./data/instruct_dataset.jsonl
type: chat_template
- path: ./data/nsfw_stories.jsonl
type: chat_template
- path: ./data/stories_dataset.jsonl
type: chat_template
<br>
dataset_prepared_path: last_run_prepared
val_set_size: 0
output_dir: ./Magi-PT-2-SFT-2
<br>
adapter: lora
peft_use_rslora: true
lora_model_dir:
<br>
sequence_len: 8192
sample_packing: true
<br>
lora_r: 128
lora_alpha: 128
lora_dropout: 0.05
lora_target_linear: true
lora_target_modules:
- gate_proj
- down_proj
- up_proj
- q_proj
- v_proj
- k_proj
- o_proj
<br>
wandb_project: Magi-SFT
wandb_name: Magi-PT-2-SFT-2
<br>
gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 1e-5
<br>
bf16: auto
tf32: false
<br>
gradient_checkpointing: true
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
<br>
warmup_ratio: 0.05
evals_per_epoch: 1
saves_per_epoch: 2</code></pre>
</div>
</details>
</div>
</div>
</div>
</div>
</body>
</html>