123

by demonissyx - opened Jun 18

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+164

-50

Files changed (14) hide show

README.md +8 -13
ckpt/encode-s12k.pt +3 -0
ckpt/model_1rvq/model_2_fixed.safetensors +2 -2
ckpt/model_septoken/model_2.safetensors +2 -2
ckpt/models--lengyue233--content-vec-best/.no_exist/c0b9ba13db21beaa4053faae94c102ebe326fd68/model.safetensors +0 -0
ckpt/models--lengyue233--content-vec-best/.no_exist/c0b9ba13db21beaa4053faae94c102ebe326fd68/model.safetensors.index.json +0 -0
ckpt/models--lengyue233--content-vec-best/blobs/5186a71b15933aca2d9942db95e1aff02642d1f0 +71 -0
ckpt/models--lengyue233--content-vec-best/blobs/d8dd400e054ddf4e6be75dab5a2549db748cc99e756a097c496c099f65a4854e +3 -0
ckpt/models--lengyue233--content-vec-best/refs/main +1 -0
ckpt/models--lengyue233--content-vec-best/snapshots/c0b9ba13db21beaa4053faae94c102ebe326fd68/config.json +71 -0
ckpt/models--lengyue233--content-vec-best/snapshots/c0b9ba13db21beaa4053faae94c102ebe326fd68/pytorch_model.bin +3 -0
ckpt/{songgeneration_base → songgeneration_base_zh}/config.yaml +0 -33
ckpt/{songgeneration_base → songgeneration_base_zh}/model.pt +0 -0
img/logo.jpg +0 -0

README.md CHANGED Viewed

@@ -3,14 +3,13 @@ language:
 - en
 - zh
 pipeline_tag: text-to-audio
-library_name: tencent-song-generation
 ---
 # SongGeneration
-<p align="center"><img src="img/logo.jpg" width="40%"></p>
 <p align="center">
-    <a href="https://levo-demo.github.io/">Demo</a> &nbsp;|&nbsp; <a href="https://arxiv.org/abs/2506.07520">Paper</a>  &nbsp;|&nbsp; <a href="https://github.com/tencent-ailab/songgeneration">Code</a>  &nbsp;|&nbsp; <a href="https://huggingface.co/spaces/tencent/SongGeneration">Space Demo</a>
 </p>
@@ -18,15 +17,11 @@ This repository is the official weight repository for LeVo: High-Quality Song Ge
 ## Model Versions
-| Model                     | Max Length |       Language       | GPU Menmory | RFT(A100) | Download Link                                                |
-| ------------------------- | :--------: | :------------------: | :---------: | :-------: | ------------------------------------------------------------ |
-| SongGeneration-base       |   2m30s    |          zh          |   10G/16G   |   1.26    | You were here |
-| SongGeneration-base-new   |   2m30s    |        zh, en        |   10G/16G   |   1.26    | [Huggingface](https://huggingface.co/lglg666/SongGeneration-base-new) |
-| SongGeneration-base-full  |   4m30s    |        zh, en        |   12G/18G   |   1.30    | [Huggingface](https://huggingface.co/lglg666/SongGeneration-base-full) |
-| SongGeneration-large      |   4m30s    |        zh, en        |   22G/28G   |   1.51    | [Huggingface](https://huggingface.co/lglg666/SongGeneration-large) |
-| SongGeneration-v1.5-small |     2m     | zh, en, es, ja, etc. |      -      |     -     | Coming soon                                                  |
-| SongGeneration-v1.5-base  |   4m30s    | zh, en, es, ja, etc. |      -      |     -     | Coming soon                                                  |
-| SongGeneration-v1.5-large |   4m30s    | zh, en, es, ja, etc. |      -      |     -     | Coming soon                                                  |
 ## Overview
@@ -36,4 +31,4 @@ We develop the SongGeneration model. It is an LM-based framework consisting of *
 ## License
-The code and weights in this repository is released in the [LICENSE](LICENSE)  file.

 - en
 - zh
 pipeline_tag: text-to-audio
+library_name: transformers
 ---
 # SongGeneration
 <p align="center">
+    <a href="https://levo-demo.github.io/">Demo</a> &nbsp;|&nbsp; <a href="https://arxiv.org/abs/2506.07520">Paper</a>  &nbsp;|&nbsp; <a href="https://github.com/tencent-ailab/songgeneration">Code</a>  &nbsp;|&nbsp; <a href="https://huggingface.co/spaces/waytan22/SongGeneration-LeVo">Space Demo</a>
 </p>
 ## Model Versions
+|          Model           |                         HuggingFace                          |
+| :----------------------: | :----------------------------------------------------------: |
+|  SongGeneration-base(zh)   | <a href="https://huggingface.co/tencent/SongGeneration/tree/main/ckpt/songgeneration_base_zh">v20250520</a> |
+| SongGeneration-base(zh&en) |                         Coming soon                          |
+| SongGeneration-full(zh&en) |                         Coming soon                          |
 ## Overview
 ## License
+The code and weights in this repository is released in the [LICENSE](LICENSE)  file.

ckpt/encode-s12k.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e250df56b035f74c1f66f15133f4c78f664d70fa0b09aa9a752b7871bb58c02f
+size 3957949089

ckpt/model_1rvq/model_2_fixed.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cfbc5d4f0057921f64bccb93431fa5820be8cae326d913ad383cac1f61b8052f
-size 659473962

 version https://git-lfs.github.com/spec/v1
+oid sha256:339a16956b859a82defc02bfd32c3744d11ff942065f6ec9306dfd4400d62110
+size 4704507596

ckpt/model_septoken/model_2.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a82451aeba6e171b47c6be9991698e46cf859eb3f17bfbedc17332341bd86e4
-size 3768119184

 version https://git-lfs.github.com/spec/v1
+oid sha256:758aa342942a7b7c0ae179af1a952e0b944e39128ea816741499b3031113aaee
+size 4808167708

ckpt/models--lengyue233--content-vec-best/.no_exist/c0b9ba13db21beaa4053faae94c102ebe326fd68/model.safetensors ADDED Viewed

File without changes

ckpt/models--lengyue233--content-vec-best/.no_exist/c0b9ba13db21beaa4053faae94c102ebe326fd68/model.safetensors.index.json ADDED Viewed

File without changes

ckpt/models--lengyue233--content-vec-best/blobs/5186a71b15933aca2d9942db95e1aff02642d1f0 ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "HubertModelWithFinalProj"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "feat_proj_layer_norm": true,
+  "final_dropout": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "hubert",
+  "num_attention_heads": 12,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "torch_dtype": "float32",
+  "transformers_version": "4.27.3",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32
+}

ckpt/models--lengyue233--content-vec-best/blobs/d8dd400e054ddf4e6be75dab5a2549db748cc99e756a097c496c099f65a4854e ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8dd400e054ddf4e6be75dab5a2549db748cc99e756a097c496c099f65a4854e
+size 378342945

ckpt/models--lengyue233--content-vec-best/refs/main ADDED Viewed

	@@ -0,0 +1 @@


1	+ c0b9ba13db21beaa4053faae94c102ebe326fd68

ckpt/models--lengyue233--content-vec-best/snapshots/c0b9ba13db21beaa4053faae94c102ebe326fd68/config.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "HubertModelWithFinalProj"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
+  "feat_proj_layer_norm": true,
+  "final_dropout": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "hubert",
+  "num_attention_heads": 12,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "torch_dtype": "float32",
+  "transformers_version": "4.27.3",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32
+}

ckpt/models--lengyue233--content-vec-best/snapshots/c0b9ba13db21beaa4053faae94c102ebe326fd68/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8dd400e054ddf4e6be75dab5a2549db748cc99e756a097c496c099f65a4854e
+size 378342945

ckpt/{songgeneration_base → songgeneration_base_zh}/config.yaml RENAMED Viewed

@@ -106,36 +106,3 @@ conditioners:
     QwTextTokenizer:
       token_path: third_party/Qwen2-7B
       max_len: 50
-offload:
-  audiolm:
-    offload_module: self
-    cpu_mem_gb: 0
-    pre_copy_step: 1
-    clean_cache_after_forward: false
-    dtype: torch.float16
-    offload_layer_dict:
-      transformer: 4
-      transformer2: 4
-    ignore_layer_list: []
-    clean_cache_wrapper:
-      module: self
-      method_name: _sample_next_token
-      diff_mem_gb_thre: 2
-    debug: false
-  wav_tokenizer_diffusion:
-    offload_module: self.model.model
-    pre_copy_step: 1
-    clean_cache_after_forward: false
-    cpu_mem_gb: -1
-    dtype: null
-    offload_layer_dict:
-      cfm_wrapper: 5
-      hubert: 4
-    ignore_layer_list: []
-    clean_cache_wrapper:
-      module: self.model.model.cfm_wrapper.estimator
-      method_name: forward
-      diff_mem_gb_thre: 1
-    debug: false

     QwTextTokenizer:
       token_path: third_party/Qwen2-7B
       max_len: 50

ckpt/{songgeneration_base → songgeneration_base_zh}/model.pt RENAMED Viewed

File without changes

img/logo.jpg DELETED Viewed

Binary file (70.4 kB)