update
Browse files- NOTICE +229 -1
- README.md +4 -4
- assets/logo.jpg +0 -0
- modeling_qwen.py +62 -69
- tokenizer_config.json +1 -0
    	
        NOTICE
    CHANGED
    
    | @@ -49,4 +49,232 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 49 | 
             
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         | 
| 50 | 
             
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         | 
| 51 | 
             
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 52 | 
            -
            SOFTWARE.
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 49 | 
             
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         | 
| 50 | 
             
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         | 
| 51 | 
             
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 52 | 
            +
            SOFTWARE.
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            ------------- LICENSE FOR stanford_alpaca code  --------------
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                                            Apache License
         | 
| 57 | 
            +
                                       Version 2.0, January 2004
         | 
| 58 | 
            +
                                    http://www.apache.org/licenses/
         | 
| 59 | 
            +
             | 
| 60 | 
            +
               TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
         | 
| 61 | 
            +
             | 
| 62 | 
            +
               1. Definitions.
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                  "License" shall mean the terms and conditions for use, reproduction,
         | 
| 65 | 
            +
                  and distribution as defined by Sections 1 through 9 of this document.
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  "Licensor" shall mean the copyright owner or entity authorized by
         | 
| 68 | 
            +
                  the copyright owner that is granting the License.
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                  "Legal Entity" shall mean the union of the acting entity and all
         | 
| 71 | 
            +
                  other entities that control, are controlled by, or are under common
         | 
| 72 | 
            +
                  control with that entity. For the purposes of this definition,
         | 
| 73 | 
            +
                  "control" means (i) the power, direct or indirect, to cause the
         | 
| 74 | 
            +
                  direction or management of such entity, whether by contract or
         | 
| 75 | 
            +
                  otherwise, or (ii) ownership of fifty percent (50%) or more of the
         | 
| 76 | 
            +
                  outstanding shares, or (iii) beneficial ownership of such entity.
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                  "You" (or "Your") shall mean an individual or Legal Entity
         | 
| 79 | 
            +
                  exercising permissions granted by this License.
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                  "Source" form shall mean the preferred form for making modifications,
         | 
| 82 | 
            +
                  including but not limited to software source code, documentation
         | 
| 83 | 
            +
                  source, and configuration files.
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                  "Object" form shall mean any form resulting from mechanical
         | 
| 86 | 
            +
                  transformation or translation of a Source form, including but
         | 
| 87 | 
            +
                  not limited to compiled object code, generated documentation,
         | 
| 88 | 
            +
                  and conversions to other media types.
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                  "Work" shall mean the work of authorship, whether in Source or
         | 
| 91 | 
            +
                  Object form, made available under the License, as indicated by a
         | 
| 92 | 
            +
                  copyright notice that is included in or attached to the work
         | 
| 93 | 
            +
                  (an example is provided in the Appendix below).
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                  "Derivative Works" shall mean any work, whether in Source or Object
         | 
| 96 | 
            +
                  form, that is based on (or derived from) the Work and for which the
         | 
| 97 | 
            +
                  editorial revisions, annotations, elaborations, or other modifications
         | 
| 98 | 
            +
                  represent, as a whole, an original work of authorship. For the purposes
         | 
| 99 | 
            +
                  of this License, Derivative Works shall not include works that remain
         | 
| 100 | 
            +
                  separable from, or merely link (or bind by name) to the interfaces of,
         | 
| 101 | 
            +
                  the Work and Derivative Works thereof.
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                  "Contribution" shall mean any work of authorship, including
         | 
| 104 | 
            +
                  the original version of the Work and any modifications or additions
         | 
| 105 | 
            +
                  to that Work or Derivative Works thereof, that is intentionally
         | 
| 106 | 
            +
                  submitted to Licensor for inclusion in the Work by the copyright owner
         | 
| 107 | 
            +
                  or by an individual or Legal Entity authorized to submit on behalf of
         | 
| 108 | 
            +
                  the copyright owner. For the purposes of this definition, "submitted"
         | 
| 109 | 
            +
                  means any form of electronic, verbal, or written communication sent
         | 
| 110 | 
            +
                  to the Licensor or its representatives, including but not limited to
         | 
| 111 | 
            +
                  communication on electronic mailing lists, source code control systems,
         | 
| 112 | 
            +
                  and issue tracking systems that are managed by, or on behalf of, the
         | 
| 113 | 
            +
                  Licensor for the purpose of discussing and improving the Work, but
         | 
| 114 | 
            +
                  excluding communication that is conspicuously marked or otherwise
         | 
| 115 | 
            +
                  designated in writing by the copyright owner as "Not a Contribution."
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                  "Contributor" shall mean Licensor and any individual or Legal Entity
         | 
| 118 | 
            +
                  on behalf of whom a Contribution has been received by Licensor and
         | 
| 119 | 
            +
                  subsequently incorporated within the Work.
         | 
| 120 | 
            +
             | 
| 121 | 
            +
               2. Grant of Copyright License. Subject to the terms and conditions of
         | 
| 122 | 
            +
                  this License, each Contributor hereby grants to You a perpetual,
         | 
| 123 | 
            +
                  worldwide, non-exclusive, no-charge, royalty-free, irrevocable
         | 
| 124 | 
            +
                  copyright license to reproduce, prepare Derivative Works of,
         | 
| 125 | 
            +
                  publicly display, publicly perform, sublicense, and distribute the
         | 
| 126 | 
            +
                  Work and such Derivative Works in Source or Object form.
         | 
| 127 | 
            +
             | 
| 128 | 
            +
               3. Grant of Patent License. Subject to the terms and conditions of
         | 
| 129 | 
            +
                  this License, each Contributor hereby grants to You a perpetual,
         | 
| 130 | 
            +
                  worldwide, non-exclusive, no-charge, royalty-free, irrevocable
         | 
| 131 | 
            +
                  (except as stated in this section) patent license to make, have made,
         | 
| 132 | 
            +
                  use, offer to sell, sell, import, and otherwise transfer the Work,
         | 
| 133 | 
            +
                  where such license applies only to those patent claims licensable
         | 
| 134 | 
            +
                  by such Contributor that are necessarily infringed by their
         | 
| 135 | 
            +
                  Contribution(s) alone or by combination of their Contribution(s)
         | 
| 136 | 
            +
                  with the Work to which such Contribution(s) was submitted. If You
         | 
| 137 | 
            +
                  institute patent litigation against any entity (including a
         | 
| 138 | 
            +
                  cross-claim or counterclaim in a lawsuit) alleging that the Work
         | 
| 139 | 
            +
                  or a Contribution incorporated within the Work constitutes direct
         | 
| 140 | 
            +
                  or contributory patent infringement, then any patent licenses
         | 
| 141 | 
            +
                  granted to You under this License for that Work shall terminate
         | 
| 142 | 
            +
                  as of the date such litigation is filed.
         | 
| 143 | 
            +
             | 
| 144 | 
            +
               4. Redistribution. You may reproduce and distribute copies of the
         | 
| 145 | 
            +
                  Work or Derivative Works thereof in any medium, with or without
         | 
| 146 | 
            +
                  modifications, and in Source or Object form, provided that You
         | 
| 147 | 
            +
                  meet the following conditions:
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                  (a) You must give any other recipients of the Work or
         | 
| 150 | 
            +
                      Derivative Works a copy of this License; and
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                  (b) You must cause any modified files to carry prominent notices
         | 
| 153 | 
            +
                      stating that You changed the files; and
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                  (c) You must retain, in the Source form of any Derivative Works
         | 
| 156 | 
            +
                      that You distribute, all copyright, patent, trademark, and
         | 
| 157 | 
            +
                      attribution notices from the Source form of the Work,
         | 
| 158 | 
            +
                      excluding those notices that do not pertain to any part of
         | 
| 159 | 
            +
                      the Derivative Works; and
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                  (d) If the Work includes a "NOTICE" text file as part of its
         | 
| 162 | 
            +
                      distribution, then any Derivative Works that You distribute must
         | 
| 163 | 
            +
                      include a readable copy of the attribution notices contained
         | 
| 164 | 
            +
                      within such NOTICE file, excluding those notices that do not
         | 
| 165 | 
            +
                      pertain to any part of the Derivative Works, in at least one
         | 
| 166 | 
            +
                      of the following places: within a NOTICE text file distributed
         | 
| 167 | 
            +
                      as part of the Derivative Works; within the Source form or
         | 
| 168 | 
            +
                      documentation, if provided along with the Derivative Works; or,
         | 
| 169 | 
            +
                      within a display generated by the Derivative Works, if and
         | 
| 170 | 
            +
                      wherever such third-party notices normally appear. The contents
         | 
| 171 | 
            +
                      of the NOTICE file are for informational purposes only and
         | 
| 172 | 
            +
                      do not modify the License. You may add Your own attribution
         | 
| 173 | 
            +
                      notices within Derivative Works that You distribute, alongside
         | 
| 174 | 
            +
                      or as an addendum to the NOTICE text from the Work, provided
         | 
| 175 | 
            +
                      that such additional attribution notices cannot be construed
         | 
| 176 | 
            +
                      as modifying the License.
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                  You may add Your own copyright statement to Your modifications and
         | 
| 179 | 
            +
                  may provide additional or different license terms and conditions
         | 
| 180 | 
            +
                  for use, reproduction, or distribution of Your modifications, or
         | 
| 181 | 
            +
                  for any such Derivative Works as a whole, provided Your use,
         | 
| 182 | 
            +
                  reproduction, and distribution of the Work otherwise complies with
         | 
| 183 | 
            +
                  the conditions stated in this License.
         | 
| 184 | 
            +
             | 
| 185 | 
            +
               5. Submission of Contributions. Unless You explicitly state otherwise,
         | 
| 186 | 
            +
                  any Contribution intentionally submitted for inclusion in the Work
         | 
| 187 | 
            +
                  by You to the Licensor shall be under the terms and conditions of
         | 
| 188 | 
            +
                  this License, without any additional terms or conditions.
         | 
| 189 | 
            +
                  Notwithstanding the above, nothing herein shall supersede or modify
         | 
| 190 | 
            +
                  the terms of any separate license agreement you may have executed
         | 
| 191 | 
            +
                  with Licensor regarding such Contributions.
         | 
| 192 | 
            +
             | 
| 193 | 
            +
               6. Trademarks. This License does not grant permission to use the trade
         | 
| 194 | 
            +
                  names, trademarks, service marks, or product names of the Licensor,
         | 
| 195 | 
            +
                  except as required for reasonable and customary use in describing the
         | 
| 196 | 
            +
                  origin of the Work and reproducing the content of the NOTICE file.
         | 
| 197 | 
            +
             | 
| 198 | 
            +
               7. Disclaimer of Warranty. Unless required by applicable law or
         | 
| 199 | 
            +
                  agreed to in writing, Licensor provides the Work (and each
         | 
| 200 | 
            +
                  Contributor provides its Contributions) on an "AS IS" BASIS,
         | 
| 201 | 
            +
                  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
         | 
| 202 | 
            +
                  implied, including, without limitation, any warranties or conditions
         | 
| 203 | 
            +
                  of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
         | 
| 204 | 
            +
                  PARTICULAR PURPOSE. You are solely responsible for determining the
         | 
| 205 | 
            +
                  appropriateness of using or redistributing the Work and assume any
         | 
| 206 | 
            +
                  risks associated with Your exercise of permissions under this License.
         | 
| 207 | 
            +
             | 
| 208 | 
            +
               8. Limitation of Liability. In no event and under no legal theory,
         | 
| 209 | 
            +
                  whether in tort (including negligence), contract, or otherwise,
         | 
| 210 | 
            +
                  unless required by applicable law (such as deliberate and grossly
         | 
| 211 | 
            +
                  negligent acts) or agreed to in writing, shall any Contributor be
         | 
| 212 | 
            +
                  liable to You for damages, including any direct, indirect, special,
         | 
| 213 | 
            +
                  incidental, or consequential damages of any character arising as a
         | 
| 214 | 
            +
                  result of this License or out of the use or inability to use the
         | 
| 215 | 
            +
                  Work (including but not limited to damages for loss of goodwill,
         | 
| 216 | 
            +
                  work stoppage, computer failure or malfunction, or any and all
         | 
| 217 | 
            +
                  other commercial damages or losses), even if such Contributor
         | 
| 218 | 
            +
                  has been advised of the possibility of such damages.
         | 
| 219 | 
            +
             | 
| 220 | 
            +
               9. Accepting Warranty or Additional Liability. While redistributing
         | 
| 221 | 
            +
                  the Work or Derivative Works thereof, You may choose to offer,
         | 
| 222 | 
            +
                  and charge a fee for, acceptance of support, warranty, indemnity,
         | 
| 223 | 
            +
                  or other liability obligations and/or rights consistent with this
         | 
| 224 | 
            +
                  License. However, in accepting such obligations, You may act only
         | 
| 225 | 
            +
                  on Your own behalf and on Your sole responsibility, not on behalf
         | 
| 226 | 
            +
                  of any other Contributor, and only if You agree to indemnify,
         | 
| 227 | 
            +
                  defend, and hold each Contributor harmless for any liability
         | 
| 228 | 
            +
                  incurred by, or claims asserted against, such Contributor by reason
         | 
| 229 | 
            +
                  of your accepting any such warranty or additional liability.
         | 
| 230 | 
            +
             | 
| 231 | 
            +
               END OF TERMS AND CONDITIONS
         | 
| 232 | 
            +
             | 
| 233 | 
            +
               APPENDIX: How to apply the Apache License to your work.
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                  To apply the Apache License to your work, attach the following
         | 
| 236 | 
            +
                  boilerplate notice, with the fields enclosed by brackets "[]"
         | 
| 237 | 
            +
                  replaced with your own identifying information. (Don't include
         | 
| 238 | 
            +
                  the brackets!)  The text should be enclosed in the appropriate
         | 
| 239 | 
            +
                  comment syntax for the file format. We also recommend that a
         | 
| 240 | 
            +
                  file or class name and description of purpose be included on the
         | 
| 241 | 
            +
                  same "printed page" as the copyright notice for easier
         | 
| 242 | 
            +
                  identification within third-party archives.
         | 
| 243 | 
            +
             | 
| 244 | 
            +
               Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
         | 
| 245 | 
            +
             | 
| 246 | 
            +
               Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 247 | 
            +
               you may not use this file except in compliance with the License.
         | 
| 248 | 
            +
               You may obtain a copy of the License at
         | 
| 249 | 
            +
             | 
| 250 | 
            +
                   http://www.apache.org/licenses/LICENSE-2.0
         | 
| 251 | 
            +
             | 
| 252 | 
            +
               Unless required by applicable law or agreed to in writing, software
         | 
| 253 | 
            +
               distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 254 | 
            +
               WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 255 | 
            +
               See the License for the specific language governing permissions and
         | 
| 256 | 
            +
               limitations under the License.
         | 
| 257 | 
            +
               
         | 
| 258 | 
            +
            ------------- LICENSE FOR PanQiWei AutoGPTQ code  --------------
         | 
| 259 | 
            +
             | 
| 260 | 
            +
            MIT License
         | 
| 261 | 
            +
             | 
| 262 | 
            +
            Copyright (c) 2023 潘其威(William)
         | 
| 263 | 
            +
             | 
| 264 | 
            +
            Permission is hereby granted, free of charge, to any person obtaining a copy
         | 
| 265 | 
            +
            of this software and associated documentation files (the "Software"), to deal
         | 
| 266 | 
            +
            in the Software without restriction, including without limitation the rights
         | 
| 267 | 
            +
            to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         | 
| 268 | 
            +
            copies of the Software, and to permit persons to whom the Software is
         | 
| 269 | 
            +
            furnished to do so, subject to the following conditions:
         | 
| 270 | 
            +
             | 
| 271 | 
            +
            The above copyright notice and this permission notice shall be included in all
         | 
| 272 | 
            +
            copies or substantial portions of the Software.
         | 
| 273 | 
            +
             | 
| 274 | 
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         | 
| 275 | 
            +
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         | 
| 276 | 
            +
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         | 
| 277 | 
            +
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         | 
| 278 | 
            +
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         | 
| 279 | 
            +
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 280 | 
            +
            SOFTWARE.
         | 
    	
        README.md
    CHANGED
    
    | @@ -16,9 +16,9 @@ inference: false | |
| 16 | 
             
            <br>
         | 
| 17 |  | 
| 18 | 
             
            <p align="center">
         | 
| 19 | 
            -
                    🤗 <a href="https://huggingface.co/Qwen">Hugging Face</a>   |   🤖 <a href="https://modelscope.cn/organization/qwen">ModelScope</a>   |    📑 <a href="https://arxiv.org/abs/2309.16609">Paper</a | 
| 20 | 
             
            <br>
         | 
| 21 | 
            -
            <a href=" | 
| 22 | 
             
            </p>
         | 
| 23 | 
             
            <br>
         | 
| 24 |  | 
| @@ -597,9 +597,9 @@ If you find our work helpful, feel free to give us a cite. | |
| 597 |  | 
| 598 | 
             
            ## 使用协议(License Agreement)
         | 
| 599 |  | 
| 600 | 
            -
            我们的代码和模型权重对学术研究完全开放,并支持商用。请查看[LICENSE](https://github.com/QwenLM/Qwen/blob/main/ | 
| 601 |  | 
| 602 | 
            -
            Our code and checkpoints are open to research purpose, and they are allowed for commercial purposes. Check [LICENSE](https://github.com/QwenLM/Qwen/blob/main/ | 
| 603 | 
             
            <br>
         | 
| 604 |  | 
| 605 |  | 
|  | |
| 16 | 
             
            <br>
         | 
| 17 |  | 
| 18 | 
             
            <p align="center">
         | 
| 19 | 
            +
                    🤗 <a href="https://huggingface.co/Qwen">Hugging Face</a>   |   🤖 <a href="https://modelscope.cn/organization/qwen">ModelScope</a>   |    📑 <a href="https://arxiv.org/abs/2309.16609">Paper</a>    |   🖥️ <a href="https://modelscope.cn/studios/qwen/Qwen-14B-Chat-Demo/summary">Demo</a>
         | 
| 20 | 
             
            <br>
         | 
| 21 | 
            +
            <a href="assets/wechat.png">WeChat (微信)</a>   |   <a href="https://discord.gg/z3GAxXZ9Ce">Discord</a>   |    <a href="https://dashscope.aliyun.com">API</a> 
         | 
| 22 | 
             
            </p>
         | 
| 23 | 
             
            <br>
         | 
| 24 |  | 
|  | |
| 597 |  | 
| 598 | 
             
            ## 使用协议(License Agreement)
         | 
| 599 |  | 
| 600 | 
            +
            我们的代码和模型权重对学术研究完全开放,并支持商用。请查看[LICENSE](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT)了解具体的开源协议细节。如需商用,请填写[问卷](https://dashscope.console.aliyun.com/openModelApply/Qwen-14B-Chat)申请。
         | 
| 601 |  | 
| 602 | 
            +
            Our code and checkpoints are open to research purpose, and they are allowed for commercial purposes. Check [LICENSE](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) for more details about the license. If you have requirements for commercial use, please fill out the [form](https://dashscope.console.aliyun.com/openModelApply/Qwen-14B-Chat) to apply.
         | 
| 603 | 
             
            <br>
         | 
| 604 |  | 
| 605 |  | 
    	
        assets/logo.jpg
    CHANGED
    
    |   | 
|   | 
    	
        modeling_qwen.py
    CHANGED
    
    | @@ -13,7 +13,6 @@ import torch | |
| 13 | 
             
            import torch.nn.functional as F
         | 
| 14 | 
             
            import torch.utils.checkpoint
         | 
| 15 | 
             
            import warnings
         | 
| 16 | 
            -
            from torch.cuda.amp import autocast
         | 
| 17 |  | 
| 18 | 
             
            from torch.nn import CrossEntropyLoss
         | 
| 19 | 
             
            from transformers import PreTrainedTokenizer, GenerationConfig, StoppingCriteriaList
         | 
| @@ -79,9 +78,10 @@ We detect you have activated flash attention support, but running model computat | |
| 79 | 
             
            apply_rotary_emb_func = None
         | 
| 80 | 
             
            rms_norm = None
         | 
| 81 | 
             
            flash_attn_unpadded_func = None
         | 
|  | |
| 82 |  | 
| 83 | 
             
            def _import_flash_attn():
         | 
| 84 | 
            -
                global apply_rotary_emb_func, rms_norm, flash_attn_unpadded_func
         | 
| 85 | 
             
                try:
         | 
| 86 | 
             
                    from flash_attn.layers.rotary import apply_rotary_emb_func as __apply_rotary_emb_func
         | 
| 87 | 
             
                    apply_rotary_emb_func = __apply_rotary_emb_func
         | 
| @@ -102,14 +102,18 @@ def _import_flash_attn(): | |
| 102 |  | 
| 103 | 
             
                try:
         | 
| 104 | 
             
                    import flash_attn
         | 
|  | |
| 105 | 
             
                    if not hasattr(flash_attn, '__version__'):
         | 
| 106 | 
             
                        from flash_attn.flash_attn_interface import flash_attn_unpadded_func as __flash_attn_unpadded_func
         | 
| 107 | 
             
                    else:
         | 
| 108 | 
             
                        if int(flash_attn.__version__.split(".")[0]) >= 2:
         | 
|  | |
|  | |
| 109 | 
             
                            from flash_attn.flash_attn_interface import flash_attn_varlen_func as __flash_attn_unpadded_func
         | 
| 110 | 
             
                        else:
         | 
| 111 | 
             
                            from flash_attn.flash_attn_interface import flash_attn_unpadded_func as __flash_attn_unpadded_func
         | 
| 112 | 
             
                    flash_attn_unpadded_func = __flash_attn_unpadded_func
         | 
|  | |
| 113 | 
             
                except ImportError:
         | 
| 114 | 
             
                    logger.warn(
         | 
| 115 | 
             
                        "Warning: import flash_attn fail, please install FlashAttention to get higher efficiency "
         | 
| @@ -182,6 +186,11 @@ class FlashSelfAttention(torch.nn.Module): | |
| 182 | 
             
                    seqlen_k = k.shape[1]
         | 
| 183 | 
             
                    seqlen_out = seqlen_q
         | 
| 184 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 185 | 
             
                    q, k, v = [rearrange(x, "b s ... -> (b s) ...") for x in [q, k, v]]
         | 
| 186 | 
             
                    cu_seqlens_q = torch.arange(
         | 
| 187 | 
             
                        0,
         | 
| @@ -311,7 +320,7 @@ class QWenAttention(nn.Module): | |
| 311 | 
             
                                warnings.warn("Failed to import KV cache kernels.")
         | 
| 312 | 
             
                                self.cache_kernels = None
         | 
| 313 |  | 
| 314 | 
            -
                def _attn(self, query, key, value,  | 
| 315 | 
             
                    device = query.device
         | 
| 316 | 
             
                    if self.use_cache_quantization:
         | 
| 317 | 
             
                        qk, qk_scale, qk_zero = key
         | 
| @@ -336,26 +345,13 @@ class QWenAttention(nn.Module): | |
| 336 | 
             
                            size_temp = value[0].size(-1)
         | 
| 337 | 
             
                        else:
         | 
| 338 | 
             
                            size_temp = value.size(-1)
         | 
| 339 | 
            -
                        attn_weights = attn_weights /  | 
| 340 | 
            -
             | 
| 341 | 
            -
                            size_temp ** 0.5,
         | 
| 342 | 
            -
                            dtype=attn_weights.dtype,
         | 
| 343 | 
            -
                            device=attn_weights.device,
         | 
| 344 | 
            -
                        )
         | 
| 345 | 
            -
                    if self.use_cache_quantization:
         | 
| 346 | 
            -
                        query_length, key_length = query.size(-2), key[0].size(-2)
         | 
| 347 | 
            -
                    else:
         | 
| 348 | 
            -
                        query_length, key_length = query.size(-2), key.size(-2)
         | 
| 349 | 
            -
                    causal_mask = registered_causal_mask[
         | 
| 350 | 
            -
                        :, :, key_length - query_length : key_length, :key_length
         | 
| 351 | 
            -
                    ]
         | 
| 352 | 
             
                    mask_value = torch.finfo(attn_weights.dtype).min
         | 
| 353 | 
            -
                     | 
| 354 | 
            -
                        attn_weights. | 
| 355 | 
            -
             | 
| 356 | 
            -
             | 
| 357 | 
            -
                        causal_mask, attn_weights.to(attn_weights.dtype), mask_value
         | 
| 358 | 
            -
                    )
         | 
| 359 |  | 
| 360 | 
             
                    if attention_mask is not None:
         | 
| 361 | 
             
                        attn_weights = attn_weights + attention_mask
         | 
| @@ -482,7 +478,8 @@ class QWenAttention(nn.Module): | |
| 482 | 
             
                    else:
         | 
| 483 | 
             
                        present = None
         | 
| 484 |  | 
| 485 | 
            -
                    if self. | 
|  | |
| 486 | 
             
                        if self.use_cache_quantization:
         | 
| 487 | 
             
                            seq_start = key[0].size(2) - query.size(1)
         | 
| 488 | 
             
                            seq_end = key[0].size(2)
         | 
| @@ -501,15 +498,19 @@ class QWenAttention(nn.Module): | |
| 501 | 
             
                        q, k, v = query, key, value
         | 
| 502 | 
             
                        attn_output = self.core_attention_flash(q, k, v, attention_mask=attention_mask)
         | 
| 503 | 
             
                    else:
         | 
| 504 | 
            -
                         | 
| 505 | 
            -
             | 
| 506 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 507 | 
             
                        query = query.permute(0, 2, 1, 3)
         | 
| 508 | 
             
                        if not self.use_cache_quantization:
         | 
| 509 | 
             
                            key = key.permute(0, 2, 1, 3)
         | 
| 510 | 
             
                            value = value.permute(0, 2, 1, 3)
         | 
| 511 | 
             
                        if (
         | 
| 512 | 
            -
                             | 
| 513 | 
             
                            and self.use_flash_attn
         | 
| 514 | 
             
                            and flash_attn_unpadded_func is not None
         | 
| 515 | 
             
                            and not self.is_fp32
         | 
| @@ -518,13 +519,12 @@ class QWenAttention(nn.Module): | |
| 518 | 
             
                            raise Exception(_ERROR_INPUT_CPU_QUERY_WITH_FLASH_ATTN_ACTIVATED)
         | 
| 519 |  | 
| 520 | 
             
                        if not self.use_cache_quantization and SUPPORT_TORCH2:
         | 
| 521 | 
            -
                            causal_mask = registered_causal_mask[
         | 
| 522 | 
            -
                                :, :, key.size(-2) - query.size(-2): key.size(-2), :key.size(-2)
         | 
| 523 | 
            -
                            ]
         | 
| 524 | 
             
                            if attention_mask is not None:
         | 
| 525 | 
             
                                attention_mask = attention_mask.expand(
         | 
| 526 | 
             
                                    -1, -1, causal_mask.size(2), -1
         | 
| 527 | 
            -
                                ) | 
|  | |
|  | |
| 528 | 
             
                            else:
         | 
| 529 | 
             
                                attention_mask = causal_mask
         | 
| 530 | 
             
                            attn_output = F.scaled_dot_product_attention(
         | 
| @@ -533,7 +533,7 @@ class QWenAttention(nn.Module): | |
| 533 | 
             
                            attn_weight = None
         | 
| 534 | 
             
                        else:
         | 
| 535 | 
             
                            attn_output, attn_weight = self._attn(
         | 
| 536 | 
            -
                                query, key, value,  | 
| 537 | 
             
                            )
         | 
| 538 | 
             
                    context_layer = self._merge_heads(
         | 
| 539 | 
             
                        attn_output, self.num_heads, self.head_dim
         | 
| @@ -549,6 +549,8 @@ class QWenAttention(nn.Module): | |
| 549 | 
             
                            and not self.is_fp32
         | 
| 550 | 
             
                        ):
         | 
| 551 | 
             
                            raise ValueError("Cannot output attentions while using flash-attn")
         | 
|  | |
|  | |
| 552 | 
             
                        else:
         | 
| 553 | 
             
                            outputs += (attn_weight,)
         | 
| 554 |  | 
| @@ -574,6 +576,7 @@ class QWenMLP(nn.Module): | |
| 574 | 
             
                    output = self.c_proj(intermediate_parallel)
         | 
| 575 | 
             
                    return output
         | 
| 576 |  | 
|  | |
| 577 | 
             
            class QWenBlock(nn.Module):
         | 
| 578 | 
             
                def __init__(self, config):
         | 
| 579 | 
             
                    super().__init__()
         | 
| @@ -642,6 +645,7 @@ class QWenPreTrainedModel(PreTrainedModel): | |
| 642 | 
             
                is_parallelizable = False
         | 
| 643 | 
             
                supports_gradient_checkpointing = True
         | 
| 644 | 
             
                _no_split_modules = ["QWenBlock"]
         | 
|  | |
| 645 |  | 
| 646 | 
             
                def __init__(self, *inputs, **kwargs):
         | 
| 647 | 
             
                    super().__init__(*inputs, **kwargs)
         | 
| @@ -933,11 +937,6 @@ class QWenLMHeadModel(QWenPreTrainedModel): | |
| 933 | 
             
                    assert (
         | 
| 934 | 
             
                        config.bf16 + config.fp16 + config.fp32 <= 1
         | 
| 935 | 
             
                    ), "Only one of \"bf16\", \"fp16\", \"fp32\" can be true"
         | 
| 936 | 
            -
                    logger.warn(
         | 
| 937 | 
            -
                        "Warning: please make sure that you are using the latest codes and checkpoints, "
         | 
| 938 | 
            -
                        "especially if you used Qwen-7B before 09.25.2023."
         | 
| 939 | 
            -
                        "请使用最新模型和代码,尤其如果你在9月25日前已经开始使用Qwen-7B,千万注意不要使用错误代码和模型。"
         | 
| 940 | 
            -
                    )
         | 
| 941 |  | 
| 942 | 
             
                    autoset_precision = config.bf16 + config.fp16 + config.fp32 == 0
         | 
| 943 |  | 
| @@ -990,7 +989,6 @@ class QWenLMHeadModel(QWenPreTrainedModel): | |
| 990 | 
             
                        self.lm_head.half()
         | 
| 991 | 
             
                    self.post_init()
         | 
| 992 |  | 
| 993 | 
            -
             | 
| 994 | 
             
                def get_output_embeddings(self):
         | 
| 995 | 
             
                    return self.lm_head
         | 
| 996 |  | 
| @@ -1000,22 +998,13 @@ class QWenLMHeadModel(QWenPreTrainedModel): | |
| 1000 | 
             
                def prepare_inputs_for_generation(
         | 
| 1001 | 
             
                    self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs
         | 
| 1002 | 
             
                ):
         | 
| 1003 | 
            -
                    token_type_ids = kwargs.get("token_type_ids", None)
         | 
| 1004 | 
             
                    if past_key_values:
         | 
| 1005 | 
             
                        input_ids = input_ids[:, -1].unsqueeze(-1)
         | 
| 1006 | 
            -
                        if token_type_ids is not None:
         | 
| 1007 | 
            -
                            token_type_ids = token_type_ids[:, -1].unsqueeze(-1)
         | 
| 1008 |  | 
| 1009 | 
            -
                     | 
| 1010 | 
            -
             | 
| 1011 | 
            -
             | 
| 1012 | 
            -
                    if attention_mask is not None and position_ids is None:
         | 
| 1013 | 
            -
                        position_ids = attention_mask.long().cumsum(-1) - 1
         | 
| 1014 | 
            -
                        position_ids.masked_fill_(attention_mask == 0, 1)
         | 
| 1015 | 
            -
                        if past_key_values:
         | 
| 1016 | 
            -
                            position_ids = position_ids[:, -1].unsqueeze(-1)
         | 
| 1017 | 
             
                    else:
         | 
| 1018 | 
            -
                         | 
| 1019 |  | 
| 1020 | 
             
                    if inputs_embeds is not None and past_key_values is None:
         | 
| 1021 | 
             
                        model_inputs = {"inputs_embeds": inputs_embeds}
         | 
| @@ -1026,9 +1015,7 @@ class QWenLMHeadModel(QWenPreTrainedModel): | |
| 1026 | 
             
                        {
         | 
| 1027 | 
             
                            "past_key_values": past_key_values,
         | 
| 1028 | 
             
                            "use_cache": kwargs.get("use_cache"),
         | 
| 1029 | 
            -
                            "position_ids": position_ids,
         | 
| 1030 | 
             
                            "attention_mask": attention_mask,
         | 
| 1031 | 
            -
                            "token_type_ids": token_type_ids,
         | 
| 1032 | 
             
                        }
         | 
| 1033 | 
             
                    )
         | 
| 1034 | 
             
                    return model_inputs
         | 
| @@ -1299,8 +1286,7 @@ class RotaryEmbedding(torch.nn.Module): | |
| 1299 | 
             
                    self._ntk_alpha_cached = 1.0
         | 
| 1300 | 
             
                    self._ntk_alpha_cached_list = [1.0]
         | 
| 1301 |  | 
| 1302 | 
            -
                def update_rotary_pos_emb_cache(self,  | 
| 1303 | 
            -
                    seqlen = max_seq_len + offset
         | 
| 1304 | 
             
                    if seqlen > self._seq_len_cached or ntk_alpha != self._ntk_alpha_cached:
         | 
| 1305 | 
             
                        base = self.base * ntk_alpha ** (self.dim / (self.dim - 2))
         | 
| 1306 | 
             
                        self.inv_freq = 1.0 / (
         | 
| @@ -1323,10 +1309,10 @@ class RotaryEmbedding(torch.nn.Module): | |
| 1323 | 
             
                        cos, sin = emb.cos(), emb.sin()
         | 
| 1324 | 
             
                        self._rotary_pos_emb_cache = [cos, sin]
         | 
| 1325 |  | 
| 1326 | 
            -
                def forward(self, max_seq_len,  | 
| 1327 | 
            -
                    self.update_rotary_pos_emb_cache(max_seq_len,  | 
| 1328 | 
             
                    cos, sin = self._rotary_pos_emb_cache
         | 
| 1329 | 
            -
                    return [cos[:,  | 
| 1330 |  | 
| 1331 |  | 
| 1332 | 
             
            def _rotate_half(x):
         | 
| @@ -1338,21 +1324,28 @@ def _rotate_half(x): | |
| 1338 |  | 
| 1339 |  | 
| 1340 | 
             
            def apply_rotary_pos_emb(t, freqs):
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1341 | 
             
                cos, sin = freqs
         | 
|  | |
| 1342 | 
             
                if apply_rotary_emb_func is not None and t.is_cuda:
         | 
| 1343 | 
            -
                     | 
| 1344 | 
            -
                     | 
| 1345 | 
            -
                     | 
| 1346 | 
            -
                     | 
| 1347 | 
            -
                     | 
|  | |
| 1348 | 
             
                else:
         | 
| 1349 | 
            -
                     | 
| 1350 | 
            -
                    cos | 
| 1351 | 
            -
                     | 
| 1352 | 
            -
                    t_ = t_.float()
         | 
| 1353 | 
            -
                    t_pass_ = t_pass_.float()
         | 
| 1354 | 
            -
                    t_ = (t_ * cos) + (_rotate_half(t_) * sin)
         | 
| 1355 | 
            -
                    return torch.cat((t_, t_pass_), dim=-1).type_as(t)
         | 
| 1356 |  | 
| 1357 |  | 
| 1358 | 
             
            class RMSNorm(torch.nn.Module):
         | 
|  | |
| 13 | 
             
            import torch.nn.functional as F
         | 
| 14 | 
             
            import torch.utils.checkpoint
         | 
| 15 | 
             
            import warnings
         | 
|  | |
| 16 |  | 
| 17 | 
             
            from torch.nn import CrossEntropyLoss
         | 
| 18 | 
             
            from transformers import PreTrainedTokenizer, GenerationConfig, StoppingCriteriaList
         | 
|  | |
| 78 | 
             
            apply_rotary_emb_func = None
         | 
| 79 | 
             
            rms_norm = None
         | 
| 80 | 
             
            flash_attn_unpadded_func = None
         | 
| 81 | 
            +
            flash_attn_func = None
         | 
| 82 |  | 
| 83 | 
             
            def _import_flash_attn():
         | 
| 84 | 
            +
                global apply_rotary_emb_func, rms_norm, flash_attn_unpadded_func, flash_attn_func
         | 
| 85 | 
             
                try:
         | 
| 86 | 
             
                    from flash_attn.layers.rotary import apply_rotary_emb_func as __apply_rotary_emb_func
         | 
| 87 | 
             
                    apply_rotary_emb_func = __apply_rotary_emb_func
         | 
|  | |
| 102 |  | 
| 103 | 
             
                try:
         | 
| 104 | 
             
                    import flash_attn
         | 
| 105 | 
            +
                    _flash_attn_func = None
         | 
| 106 | 
             
                    if not hasattr(flash_attn, '__version__'):
         | 
| 107 | 
             
                        from flash_attn.flash_attn_interface import flash_attn_unpadded_func as __flash_attn_unpadded_func
         | 
| 108 | 
             
                    else:
         | 
| 109 | 
             
                        if int(flash_attn.__version__.split(".")[0]) >= 2:
         | 
| 110 | 
            +
                            if int(flash_attn.__version__.split(".")[1]) >= 1:
         | 
| 111 | 
            +
                                from flash_attn.flash_attn_interface import flash_attn_func as _flash_attn_func
         | 
| 112 | 
             
                            from flash_attn.flash_attn_interface import flash_attn_varlen_func as __flash_attn_unpadded_func
         | 
| 113 | 
             
                        else:
         | 
| 114 | 
             
                            from flash_attn.flash_attn_interface import flash_attn_unpadded_func as __flash_attn_unpadded_func
         | 
| 115 | 
             
                    flash_attn_unpadded_func = __flash_attn_unpadded_func
         | 
| 116 | 
            +
                    flash_attn_func = _flash_attn_func
         | 
| 117 | 
             
                except ImportError:
         | 
| 118 | 
             
                    logger.warn(
         | 
| 119 | 
             
                        "Warning: import flash_attn fail, please install FlashAttention to get higher efficiency "
         | 
|  | |
| 186 | 
             
                    seqlen_k = k.shape[1]
         | 
| 187 | 
             
                    seqlen_out = seqlen_q
         | 
| 188 |  | 
| 189 | 
            +
                    if flash_attn_func is not None and batch_size == 1:
         | 
| 190 | 
            +
                        dropout_p = self.dropout_p if self.training else 0
         | 
| 191 | 
            +
                        output = flash_attn_func(q, k, v, dropout_p, softmax_scale=self.softmax_scale, causal=self.causal)
         | 
| 192 | 
            +
                        return output
         | 
| 193 | 
            +
             | 
| 194 | 
             
                    q, k, v = [rearrange(x, "b s ... -> (b s) ...") for x in [q, k, v]]
         | 
| 195 | 
             
                    cu_seqlens_q = torch.arange(
         | 
| 196 | 
             
                        0,
         | 
|  | |
| 320 | 
             
                                warnings.warn("Failed to import KV cache kernels.")
         | 
| 321 | 
             
                                self.cache_kernels = None
         | 
| 322 |  | 
| 323 | 
            +
                def _attn(self, query, key, value, causal_mask=None, attention_mask=None, head_mask=None):
         | 
| 324 | 
             
                    device = query.device
         | 
| 325 | 
             
                    if self.use_cache_quantization:
         | 
| 326 | 
             
                        qk, qk_scale, qk_zero = key
         | 
|  | |
| 345 | 
             
                            size_temp = value[0].size(-1)
         | 
| 346 | 
             
                        else:
         | 
| 347 | 
             
                            size_temp = value.size(-1)
         | 
| 348 | 
            +
                        attn_weights = attn_weights / (size_temp ** 0.5)
         | 
| 349 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 350 | 
             
                    mask_value = torch.finfo(attn_weights.dtype).min
         | 
| 351 | 
            +
                    if causal_mask is not None:
         | 
| 352 | 
            +
                        attn_weights = torch.where(
         | 
| 353 | 
            +
                            causal_mask, attn_weights.to(attn_weights.dtype), mask_value
         | 
| 354 | 
            +
                        )
         | 
|  | |
|  | |
| 355 |  | 
| 356 | 
             
                    if attention_mask is not None:
         | 
| 357 | 
             
                        attn_weights = attn_weights + attention_mask
         | 
|  | |
| 478 | 
             
                    else:
         | 
| 479 | 
             
                        present = None
         | 
| 480 |  | 
| 481 | 
            +
                    key_size = key[0].size(2) if self.use_cache_quantization else key.size(1)
         | 
| 482 | 
            +
                    if key_size > self.seq_length and self.use_logn_attn and not self.training:
         | 
| 483 | 
             
                        if self.use_cache_quantization:
         | 
| 484 | 
             
                            seq_start = key[0].size(2) - query.size(1)
         | 
| 485 | 
             
                            seq_end = key[0].size(2)
         | 
|  | |
| 498 | 
             
                        q, k, v = query, key, value
         | 
| 499 | 
             
                        attn_output = self.core_attention_flash(q, k, v, attention_mask=attention_mask)
         | 
| 500 | 
             
                    else:
         | 
| 501 | 
            +
                        key_size = key[0].size(2) if self.use_cache_quantization else key.size(1)
         | 
| 502 | 
            +
                        if query.size(1) == key_size:
         | 
| 503 | 
            +
                            causal_mask = torch.tril(
         | 
| 504 | 
            +
                                torch.ones((key_size, key_size), dtype=torch.bool, device=query.device)
         | 
| 505 | 
            +
                            ).view(1, 1, key_size, key_size)
         | 
| 506 | 
            +
                        else:
         | 
| 507 | 
            +
                            causal_mask = None
         | 
| 508 | 
             
                        query = query.permute(0, 2, 1, 3)
         | 
| 509 | 
             
                        if not self.use_cache_quantization:
         | 
| 510 | 
             
                            key = key.permute(0, 2, 1, 3)
         | 
| 511 | 
             
                            value = value.permute(0, 2, 1, 3)
         | 
| 512 | 
             
                        if (
         | 
| 513 | 
            +
                            causal_mask is None
         | 
| 514 | 
             
                            and self.use_flash_attn
         | 
| 515 | 
             
                            and flash_attn_unpadded_func is not None
         | 
| 516 | 
             
                            and not self.is_fp32
         | 
|  | |
| 519 | 
             
                            raise Exception(_ERROR_INPUT_CPU_QUERY_WITH_FLASH_ATTN_ACTIVATED)
         | 
| 520 |  | 
| 521 | 
             
                        if not self.use_cache_quantization and SUPPORT_TORCH2:
         | 
|  | |
|  | |
|  | |
| 522 | 
             
                            if attention_mask is not None:
         | 
| 523 | 
             
                                attention_mask = attention_mask.expand(
         | 
| 524 | 
             
                                    -1, -1, causal_mask.size(2), -1
         | 
| 525 | 
            +
                                )
         | 
| 526 | 
            +
                                if causal_mask is not None:
         | 
| 527 | 
            +
                                    attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min)
         | 
| 528 | 
             
                            else:
         | 
| 529 | 
             
                                attention_mask = causal_mask
         | 
| 530 | 
             
                            attn_output = F.scaled_dot_product_attention(
         | 
|  | |
| 533 | 
             
                            attn_weight = None
         | 
| 534 | 
             
                        else:
         | 
| 535 | 
             
                            attn_output, attn_weight = self._attn(
         | 
| 536 | 
            +
                                query, key, value, causal_mask, attention_mask, head_mask
         | 
| 537 | 
             
                            )
         | 
| 538 | 
             
                    context_layer = self._merge_heads(
         | 
| 539 | 
             
                        attn_output, self.num_heads, self.head_dim
         | 
|  | |
| 549 | 
             
                            and not self.is_fp32
         | 
| 550 | 
             
                        ):
         | 
| 551 | 
             
                            raise ValueError("Cannot output attentions while using flash-attn")
         | 
| 552 | 
            +
                        elif not self.use_cache_quantization and SUPPORT_TORCH2:
         | 
| 553 | 
            +
                            raise ValueError("Cannot output attentions while using scaled_dot_product_attention")
         | 
| 554 | 
             
                        else:
         | 
| 555 | 
             
                            outputs += (attn_weight,)
         | 
| 556 |  | 
|  | |
| 576 | 
             
                    output = self.c_proj(intermediate_parallel)
         | 
| 577 | 
             
                    return output
         | 
| 578 |  | 
| 579 | 
            +
             | 
| 580 | 
             
            class QWenBlock(nn.Module):
         | 
| 581 | 
             
                def __init__(self, config):
         | 
| 582 | 
             
                    super().__init__()
         | 
|  | |
| 645 | 
             
                is_parallelizable = False
         | 
| 646 | 
             
                supports_gradient_checkpointing = True
         | 
| 647 | 
             
                _no_split_modules = ["QWenBlock"]
         | 
| 648 | 
            +
                _skip_keys_device_placement = "past_key_values"
         | 
| 649 |  | 
| 650 | 
             
                def __init__(self, *inputs, **kwargs):
         | 
| 651 | 
             
                    super().__init__(*inputs, **kwargs)
         | 
|  | |
| 937 | 
             
                    assert (
         | 
| 938 | 
             
                        config.bf16 + config.fp16 + config.fp32 <= 1
         | 
| 939 | 
             
                    ), "Only one of \"bf16\", \"fp16\", \"fp32\" can be true"
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 940 |  | 
| 941 | 
             
                    autoset_precision = config.bf16 + config.fp16 + config.fp32 == 0
         | 
| 942 |  | 
|  | |
| 989 | 
             
                        self.lm_head.half()
         | 
| 990 | 
             
                    self.post_init()
         | 
| 991 |  | 
|  | |
| 992 | 
             
                def get_output_embeddings(self):
         | 
| 993 | 
             
                    return self.lm_head
         | 
| 994 |  | 
|  | |
| 998 | 
             
                def prepare_inputs_for_generation(
         | 
| 999 | 
             
                    self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs
         | 
| 1000 | 
             
                ):
         | 
|  | |
| 1001 | 
             
                    if past_key_values:
         | 
| 1002 | 
             
                        input_ids = input_ids[:, -1].unsqueeze(-1)
         | 
|  | |
|  | |
| 1003 |  | 
| 1004 | 
            +
                    if input_ids.size(0) == 1:
         | 
| 1005 | 
            +
                        attention_mask = None
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1006 | 
             
                    else:
         | 
| 1007 | 
            +
                        attention_mask = kwargs.get("attention_mask", None)
         | 
| 1008 |  | 
| 1009 | 
             
                    if inputs_embeds is not None and past_key_values is None:
         | 
| 1010 | 
             
                        model_inputs = {"inputs_embeds": inputs_embeds}
         | 
|  | |
| 1015 | 
             
                        {
         | 
| 1016 | 
             
                            "past_key_values": past_key_values,
         | 
| 1017 | 
             
                            "use_cache": kwargs.get("use_cache"),
         | 
|  | |
| 1018 | 
             
                            "attention_mask": attention_mask,
         | 
|  | |
| 1019 | 
             
                        }
         | 
| 1020 | 
             
                    )
         | 
| 1021 | 
             
                    return model_inputs
         | 
|  | |
| 1286 | 
             
                    self._ntk_alpha_cached = 1.0
         | 
| 1287 | 
             
                    self._ntk_alpha_cached_list = [1.0]
         | 
| 1288 |  | 
| 1289 | 
            +
                def update_rotary_pos_emb_cache(self, seqlen, ntk_alpha=1.0):
         | 
|  | |
| 1290 | 
             
                    if seqlen > self._seq_len_cached or ntk_alpha != self._ntk_alpha_cached:
         | 
| 1291 | 
             
                        base = self.base * ntk_alpha ** (self.dim / (self.dim - 2))
         | 
| 1292 | 
             
                        self.inv_freq = 1.0 / (
         | 
|  | |
| 1309 | 
             
                        cos, sin = emb.cos(), emb.sin()
         | 
| 1310 | 
             
                        self._rotary_pos_emb_cache = [cos, sin]
         | 
| 1311 |  | 
| 1312 | 
            +
                def forward(self, max_seq_len, ntk_alpha=1.0):
         | 
| 1313 | 
            +
                    self.update_rotary_pos_emb_cache(max_seq_len, ntk_alpha)
         | 
| 1314 | 
             
                    cos, sin = self._rotary_pos_emb_cache
         | 
| 1315 | 
            +
                    return [cos[:, :max_seq_len], sin[:, :max_seq_len]]
         | 
| 1316 |  | 
| 1317 |  | 
| 1318 | 
             
            def _rotate_half(x):
         | 
|  | |
| 1324 |  | 
| 1325 |  | 
| 1326 | 
             
            def apply_rotary_pos_emb(t, freqs):
         | 
| 1327 | 
            +
                """ Apply rotary embedding to the first rotary_dim of the iput
         | 
| 1328 | 
            +
             | 
| 1329 | 
            +
                Arguments:
         | 
| 1330 | 
            +
                  t (tensor(batch_size, seq_len, n_head, head_dim)):
         | 
| 1331 | 
            +
                    the input embedding/hidden states
         | 
| 1332 | 
            +
                  freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]):
         | 
| 1333 | 
            +
                    the cached cos/sin position embeddings 
         | 
| 1334 | 
            +
                """
         | 
| 1335 | 
            +
                rot_dim = freqs[0].shape[-1]
         | 
| 1336 | 
             
                cos, sin = freqs
         | 
| 1337 | 
            +
                t_float = t.float()
         | 
| 1338 | 
             
                if apply_rotary_emb_func is not None and t.is_cuda:
         | 
| 1339 | 
            +
                    # apply_rotary_emb in flash_attn requires cos/sin to be of 
         | 
| 1340 | 
            +
                    # shape (seqlen, rotary_dim / 2) and apply rotary embedding 
         | 
| 1341 | 
            +
                    # to the first rotary_dim of the input
         | 
| 1342 | 
            +
                    cos = cos.squeeze(0).squeeze(1)[:, : rot_dim // 2]
         | 
| 1343 | 
            +
                    sin = sin.squeeze(0).squeeze(1)[:, : rot_dim // 2]
         | 
| 1344 | 
            +
                    return apply_rotary_emb_func(t_float, cos, sin).type_as(t)
         | 
| 1345 | 
             
                else:
         | 
| 1346 | 
            +
                    t_rot, t_pass = t_float[..., :rot_dim], t_float[..., rot_dim:]
         | 
| 1347 | 
            +
                    t_rot = (t_rot * cos) + (_rotate_half(t_rot) * sin)
         | 
| 1348 | 
            +
                    return torch.cat((t_rot, t_pass), dim=-1).type_as(t)
         | 
|  | |
|  | |
|  | |
|  | |
| 1349 |  | 
| 1350 |  | 
| 1351 | 
             
            class RMSNorm(torch.nn.Module):
         | 
    	
        tokenizer_config.json
    CHANGED
    
    | @@ -8,3 +8,4 @@ | |
| 8 | 
             
                  ]
         | 
| 9 | 
             
              }
         | 
| 10 | 
             
            }
         | 
|  | 
|  | |
| 8 | 
             
                  ]
         | 
| 9 | 
             
              }
         | 
| 10 | 
             
            }
         | 
| 11 | 
            +
              
         | 

