├── .github
    └── workflows
    │   └── python-app.yml
├── CITATION.cff
├── LICENSE
├── LLMs-papers.md
├── README.md
├── data
    ├── 2023-03-28.json
    ├── 2023-03-29.json
    ├── 2023-03-30.json
    ├── 2023-03-31.json
    ├── 2023-04-01.json
    ├── 2023-04-02.json
    ├── 2023-04-03.json
    ├── 2023-04-04.json
    ├── 2023-04-05.json
    ├── 2023-04-06.json
    ├── 2023-04-07.json
    ├── 2023-04-08.json
    ├── 2023-04-09.json
    ├── 2023-04-10.json
    ├── 2023-04-11.json
    ├── 2023-04-13.json
    ├── 2023-04-14.json
    ├── 2023-04-16.json
    ├── 2023-04-17.json
    ├── 2023-04-23.json
    ├── 2023-04-25.json
    ├── 2023-04-30.json
    ├── 2023-05-06.json
    ├── 2023-05-07.json
    ├── 2023-05-14.json
    ├── 2023-05-21.json
    ├── 2023-05-28.json
    ├── 2023-06-03.json
    ├── 2023-06-04.json
    ├── 2023-06-11.json
    ├── 2023-06-18.json
    ├── 2023-06-25.json
    ├── 2023-06-30.json
    ├── 2023-07-02.json
    ├── 2023-07-09.json
    ├── 2023-07-16.json
    ├── 2023-07-23.json
    ├── 2023-07-30.json
    ├── 2023-08-06.json
    ├── 2023-08-10.json
    ├── 2023-08-13.json
    └── README.md
├── docs
    ├── Command
    │   ├── alpaca-lora.md
    │   ├── standford-alpaca.md
    │   ├── test.txt
    │   └── 命令.txt
    ├── images
    │   ├── 13B-100-3.png
    │   ├── 7b-50-3-new.png
    │   ├── 7b-50-3.png
    │   ├── 7b-cs-3.png
    │   ├── logo.png
    │   └── usage.png
    ├── introduction
    │   ├── 1.png
    │   ├── 2.png
    │   ├── 3.png
    │   └── 介绍.md
    └── notebook
    │   └── chatgentitle_inference_in_colab.ipynb
├── finetune.py
├── generate.py
├── get_arxiv_multiprocessing.py
├── get_daily_llm_paper.py
├── requirements-all.txt
├── requirements.txt
├── templates
    ├── README.md
    ├── alpaca.json
    ├── alpaca_legacy.json
    ├── alpaca_short.json
    └── vigogne.json
└── utils
    ├── README.md
    ├── __init__.py
    └── prompter.py


/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | name: 'Crawler arxiv paper information'
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 0 * * 0'
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   arxiv-crawler:
13 | 
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - name: 'Checkout'
18 |         uses: actions/checkout@v3
19 | 
20 |       - name: 'Set time zone'
21 |         run: sudo timedatectl set-timezone 'Asia/Shanghai'
22 | 
23 |       - name: 'Set up Python3.9'
24 |         uses: actions/setup-python@v1
25 |         with:
26 |           python-version: 3.9
27 | 
28 |       - name: 'Install requirements'
29 |         run: pip install feedparser requests
30 |         
31 |       - name: 'Crawl'
32 |         run: python get_arxiv_multiprocessing.py
33 |       
34 |       - name: 'LLMs'
35 |         run: python get_daily_llm_paper.py
36 |       
37 |       - name: 'Detect changes'
38 |         id: detect_changes
39 |         run: if [ -n "$(git status --porcelain)" ]; then echo "::set-output name=changes_detected::yes"; fi
40 |       
41 |       - name: 'Push changes'
42 |         if: steps.detect_changes.outputs.changes_detected == 'yes'
43 |         run: |
44 |           git config --global user.email "wrs6@88.com"
45 |           git config --global user.name "wangrongsheng"
46 |           git add .
47 |           git commit -m "* update `date '+%Y-%m-%d %H:%M:%S'`"
48 |           git push
49 |         env:
50 |           GITHUB_TOKEN: ${{ secrets.ACCESS_TOKEN }}
51 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | title: ChatGenTitle
 3 | message: >-
 4 |   If you use this software, please cite it using these
 5 |   metadata.
 6 | type: software
 7 | authors:
 8 |   - given-names: Rongsheng Wang
 9 |     orcid: https://orcid.org/my-orcid?orcid=0000-0003-2390-5999
10 | repository-code: 'https://github.com/WangRongsheng/ChatGenTitle'
11 | url: 'https://github.com/WangRongsheng/ChatGenTitle'
12 | abstract: Paper title generation model fine-tuned on LLaMA model using information from two million arXiv papers
13 | license: CC BY-NC-SA 4.0
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution-NonCommercial-ShareAlike 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |     wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |     wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
 58 | Public License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License
 63 | ("Public License"). To the extent this Public License may be
 64 | interpreted as a contract, You are granted the Licensed Rights in
 65 | consideration of Your acceptance of these terms and conditions, and the
 66 | Licensor grants You such rights in consideration of benefits the
 67 | Licensor receives from making the Licensed Material available under
 68 | these terms and conditions.
 69 | 
 70 | 
 71 | Section 1 -- Definitions.
 72 | 
 73 |   a. Adapted Material means material subject to Copyright and Similar
 74 |      Rights that is derived from or based upon the Licensed Material
 75 |      and in which the Licensed Material is translated, altered,
 76 |      arranged, transformed, or otherwise modified in a manner requiring
 77 |      permission under the Copyright and Similar Rights held by the
 78 |      Licensor. For purposes of this Public License, where the Licensed
 79 |      Material is a musical work, performance, or sound recording,
 80 |      Adapted Material is always produced where the Licensed Material is
 81 |      synched in timed relation with a moving image.
 82 | 
 83 |   b. Adapter's License means the license You apply to Your Copyright
 84 |      and Similar Rights in Your contributions to Adapted Material in
 85 |      accordance with the terms and conditions of this Public License.
 86 | 
 87 |   c. BY-NC-SA Compatible License means a license listed at
 88 |      creativecommons.org/compatiblelicenses, approved by Creative
 89 |      Commons as essentially the equivalent of this Public License.
 90 | 
 91 |   d. Copyright and Similar Rights means copyright and/or similar rights
 92 |      closely related to copyright including, without limitation,
 93 |      performance, broadcast, sound recording, and Sui Generis Database
 94 |      Rights, without regard to how the rights are labeled or
 95 |      categorized. For purposes of this Public License, the rights
 96 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 97 |      Rights.
 98 | 
 99 |   e. Effective Technological Measures means those measures that, in the
100 |      absence of proper authority, may not be circumvented under laws
101 |      fulfilling obligations under Article 11 of the WIPO Copyright
102 |      Treaty adopted on December 20, 1996, and/or similar international
103 |      agreements.
104 | 
105 |   f. Exceptions and Limitations means fair use, fair dealing, and/or
106 |      any other exception or limitation to Copyright and Similar Rights
107 |      that applies to Your use of the Licensed Material.
108 | 
109 |   g. License Elements means the license attributes listed in the name
110 |      of a Creative Commons Public License. The License Elements of this
111 |      Public License are Attribution, NonCommercial, and ShareAlike.
112 | 
113 |   h. Licensed Material means the artistic or literary work, database,
114 |      or other material to which the Licensor applied this Public
115 |      License.
116 | 
117 |   i. Licensed Rights means the rights granted to You subject to the
118 |      terms and conditions of this Public License, which are limited to
119 |      all Copyright and Similar Rights that apply to Your use of the
120 |      Licensed Material and that the Licensor has authority to license.
121 | 
122 |   j. Licensor means the individual(s) or entity(ies) granting rights
123 |      under this Public License.
124 | 
125 |   k. NonCommercial means not primarily intended for or directed towards
126 |      commercial advantage or monetary compensation. For purposes of
127 |      this Public License, the exchange of the Licensed Material for
128 |      other material subject to Copyright and Similar Rights by digital
129 |      file-sharing or similar means is NonCommercial provided there is
130 |      no payment of monetary compensation in connection with the
131 |      exchange.
132 | 
133 |   l. Share means to provide material to the public by any means or
134 |      process that requires permission under the Licensed Rights, such
135 |      as reproduction, public display, public performance, distribution,
136 |      dissemination, communication, or importation, and to make material
137 |      available to the public including in ways that members of the
138 |      public may access the material from a place and at a time
139 |      individually chosen by them.
140 | 
141 |   m. Sui Generis Database Rights means rights other than copyright
142 |      resulting from Directive 96/9/EC of the European Parliament and of
143 |      the Council of 11 March 1996 on the legal protection of databases,
144 |      as amended and/or succeeded, as well as other essentially
145 |      equivalent rights anywhere in the world.
146 | 
147 |   n. You means the individual or entity exercising the Licensed Rights
148 |      under this Public License. Your has a corresponding meaning.
149 | 
150 | 
151 | Section 2 -- Scope.
152 | 
153 |   a. License grant.
154 | 
155 |        1. Subject to the terms and conditions of this Public License,
156 |           the Licensor hereby grants You a worldwide, royalty-free,
157 |           non-sublicensable, non-exclusive, irrevocable license to
158 |           exercise the Licensed Rights in the Licensed Material to:
159 | 
160 |             a. reproduce and Share the Licensed Material, in whole or
161 |                in part, for NonCommercial purposes only; and
162 | 
163 |             b. produce, reproduce, and Share Adapted Material for
164 |                NonCommercial purposes only.
165 | 
166 |        2. Exceptions and Limitations. For the avoidance of doubt, where
167 |           Exceptions and Limitations apply to Your use, this Public
168 |           License does not apply, and You do not need to comply with
169 |           its terms and conditions.
170 | 
171 |        3. Term. The term of this Public License is specified in Section
172 |           6(a).
173 | 
174 |        4. Media and formats; technical modifications allowed. The
175 |           Licensor authorizes You to exercise the Licensed Rights in
176 |           all media and formats whether now known or hereafter created,
177 |           and to make technical modifications necessary to do so. The
178 |           Licensor waives and/or agrees not to assert any right or
179 |           authority to forbid You from making technical modifications
180 |           necessary to exercise the Licensed Rights, including
181 |           technical modifications necessary to circumvent Effective
182 |           Technological Measures. For purposes of this Public License,
183 |           simply making modifications authorized by this Section 2(a)
184 |           (4) never produces Adapted Material.
185 | 
186 |        5. Downstream recipients.
187 | 
188 |             a. Offer from the Licensor -- Licensed Material. Every
189 |                recipient of the Licensed Material automatically
190 |                receives an offer from the Licensor to exercise the
191 |                Licensed Rights under the terms and conditions of this
192 |                Public License.
193 | 
194 |             b. Additional offer from the Licensor -- Adapted Material.
195 |                Every recipient of Adapted Material from You
196 |                automatically receives an offer from the Licensor to
197 |                exercise the Licensed Rights in the Adapted Material
198 |                under the conditions of the Adapter's License You apply.
199 | 
200 |             c. No downstream restrictions. You may not offer or impose
201 |                any additional or different terms or conditions on, or
202 |                apply any Effective Technological Measures to, the
203 |                Licensed Material if doing so restricts exercise of the
204 |                Licensed Rights by any recipient of the Licensed
205 |                Material.
206 | 
207 |        6. No endorsement. Nothing in this Public License constitutes or
208 |           may be construed as permission to assert or imply that You
209 |           are, or that Your use of the Licensed Material is, connected
210 |           with, or sponsored, endorsed, or granted official status by,
211 |           the Licensor or others designated to receive attribution as
212 |           provided in Section 3(a)(1)(A)(i).
213 | 
214 |   b. Other rights.
215 | 
216 |        1. Moral rights, such as the right of integrity, are not
217 |           licensed under this Public License, nor are publicity,
218 |           privacy, and/or other similar personality rights; however, to
219 |           the extent possible, the Licensor waives and/or agrees not to
220 |           assert any such rights held by the Licensor to the limited
221 |           extent necessary to allow You to exercise the Licensed
222 |           Rights, but not otherwise.
223 | 
224 |        2. Patent and trademark rights are not licensed under this
225 |           Public License.
226 | 
227 |        3. To the extent possible, the Licensor waives any right to
228 |           collect royalties from You for the exercise of the Licensed
229 |           Rights, whether directly or through a collecting society
230 |           under any voluntary or waivable statutory or compulsory
231 |           licensing scheme. In all other cases the Licensor expressly
232 |           reserves any right to collect such royalties, including when
233 |           the Licensed Material is used other than for NonCommercial
234 |           purposes.
235 | 
236 | 
237 | Section 3 -- License Conditions.
238 | 
239 | Your exercise of the Licensed Rights is expressly made subject to the
240 | following conditions.
241 | 
242 |   a. Attribution.
243 | 
244 |        1. If You Share the Licensed Material (including in modified
245 |           form), You must:
246 | 
247 |             a. retain the following if it is supplied by the Licensor
248 |                with the Licensed Material:
249 | 
250 |                  i. identification of the creator(s) of the Licensed
251 |                     Material and any others designated to receive
252 |                     attribution, in any reasonable manner requested by
253 |                     the Licensor (including by pseudonym if
254 |                     designated);
255 | 
256 |                 ii. a copyright notice;
257 | 
258 |                iii. a notice that refers to this Public License;
259 | 
260 |                 iv. a notice that refers to the disclaimer of
261 |                     warranties;
262 | 
263 |                  v. a URI or hyperlink to the Licensed Material to the
264 |                     extent reasonably practicable;
265 | 
266 |             b. indicate if You modified the Licensed Material and
267 |                retain an indication of any previous modifications; and
268 | 
269 |             c. indicate the Licensed Material is licensed under this
270 |                Public License, and include the text of, or the URI or
271 |                hyperlink to, this Public License.
272 | 
273 |        2. You may satisfy the conditions in Section 3(a)(1) in any
274 |           reasonable manner based on the medium, means, and context in
275 |           which You Share the Licensed Material. For example, it may be
276 |           reasonable to satisfy the conditions by providing a URI or
277 |           hyperlink to a resource that includes the required
278 |           information.
279 |        3. If requested by the Licensor, You must remove any of the
280 |           information required by Section 3(a)(1)(A) to the extent
281 |           reasonably practicable.
282 | 
283 |   b. ShareAlike.
284 | 
285 |      In addition to the conditions in Section 3(a), if You Share
286 |      Adapted Material You produce, the following conditions also apply.
287 | 
288 |        1. The Adapter's License You apply must be a Creative Commons
289 |           license with the same License Elements, this version or
290 |           later, or a BY-NC-SA Compatible License.
291 | 
292 |        2. You must include the text of, or the URI or hyperlink to, the
293 |           Adapter's License You apply. You may satisfy this condition
294 |           in any reasonable manner based on the medium, means, and
295 |           context in which You Share Adapted Material.
296 | 
297 |        3. You may not offer or impose any additional or different terms
298 |           or conditions on, or apply any Effective Technological
299 |           Measures to, Adapted Material that restrict exercise of the
300 |           rights granted under the Adapter's License You apply.
301 | 
302 | 
303 | Section 4 -- Sui Generis Database Rights.
304 | 
305 | Where the Licensed Rights include Sui Generis Database Rights that
306 | apply to Your use of the Licensed Material:
307 | 
308 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
309 |      to extract, reuse, reproduce, and Share all or a substantial
310 |      portion of the contents of the database for NonCommercial purposes
311 |      only;
312 | 
313 |   b. if You include all or a substantial portion of the database
314 |      contents in a database in which You have Sui Generis Database
315 |      Rights, then the database in which You have Sui Generis Database
316 |      Rights (but not its individual contents) is Adapted Material,
317 |      including for purposes of Section 3(b); and
318 | 
319 |   c. You must comply with the conditions in Section 3(a) if You Share
320 |      all or a substantial portion of the contents of the database.
321 | 
322 | For the avoidance of doubt, this Section 4 supplements and does not
323 | replace Your obligations under this Public License where the Licensed
324 | Rights include other Copyright and Similar Rights.
325 | 
326 | 
327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
328 | 
329 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
330 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
331 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
332 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
333 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
334 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
335 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
336 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
337 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
338 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
339 | 
340 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
341 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
342 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
343 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
344 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
345 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
346 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
347 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
348 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
349 | 
350 |   c. The disclaimer of warranties and limitation of liability provided
351 |      above shall be interpreted in a manner that, to the extent
352 |      possible, most closely approximates an absolute disclaimer and
353 |      waiver of all liability.
354 | 
355 | 
356 | Section 6 -- Term and Termination.
357 | 
358 |   a. This Public License applies for the term of the Copyright and
359 |      Similar Rights licensed here. However, if You fail to comply with
360 |      this Public License, then Your rights under this Public License
361 |      terminate automatically.
362 | 
363 |   b. Where Your right to use the Licensed Material has terminated under
364 |      Section 6(a), it reinstates:
365 | 
366 |        1. automatically as of the date the violation is cured, provided
367 |           it is cured within 30 days of Your discovery of the
368 |           violation; or
369 | 
370 |        2. upon express reinstatement by the Licensor.
371 | 
372 |      For the avoidance of doubt, this Section 6(b) does not affect any
373 |      right the Licensor may have to seek remedies for Your violations
374 |      of this Public License.
375 | 
376 |   c. For the avoidance of doubt, the Licensor may also offer the
377 |      Licensed Material under separate terms or conditions or stop
378 |      distributing the Licensed Material at any time; however, doing so
379 |      will not terminate this Public License.
380 | 
381 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
382 |      License.
383 | 
384 | 
385 | Section 7 -- Other Terms and Conditions.
386 | 
387 |   a. The Licensor shall not be bound by any additional or different
388 |      terms or conditions communicated by You unless expressly agreed.
389 | 
390 |   b. Any arrangements, understandings, or agreements regarding the
391 |      Licensed Material not stated herein are separate from and
392 |      independent of the terms and conditions of this Public License.
393 | 
394 | 
395 | Section 8 -- Interpretation.
396 | 
397 |   a. For the avoidance of doubt, this Public License does not, and
398 |      shall not be interpreted to, reduce, limit, restrict, or impose
399 |      conditions on any use of the Licensed Material that could lawfully
400 |      be made without permission under this Public License.
401 | 
402 |   b. To the extent possible, if any provision of this Public License is
403 |      deemed unenforceable, it shall be automatically reformed to the
404 |      minimum extent necessary to make it enforceable. If the provision
405 |      cannot be reformed, it shall be severed from this Public License
406 |      without affecting the enforceability of the remaining terms and
407 |      conditions.
408 | 
409 |   c. No term or condition of this Public License will be waived and no
410 |      failure to comply consented to unless expressly agreed to by the
411 |      Licensor.
412 | 
413 |   d. Nothing in this Public License constitutes or may be interpreted
414 |      as a limitation upon, or waiver of, any privileges and immunities
415 |      that apply to the Licensor or You, including from the legal
416 |      processes of any jurisdiction or authority.
417 | 
418 | =======================================================================
419 | 
420 | Creative Commons is not a party to its public
421 | licenses. Notwithstanding, Creative Commons may elect to apply one of
422 | its public licenses to material it publishes and in those instances
423 | will be considered the “Licensor.” The text of the Creative Commons
424 | public licenses is dedicated to the public domain under the CC0 Public
425 | Domain Dedication. Except for the limited purpose of indicating that
426 | material is shared under a Creative Commons public license or as
427 | otherwise permitted by the Creative Commons policies published at
428 | creativecommons.org/policies, Creative Commons does not authorize the
429 | use of the trademark "Creative Commons" or any other trademark or logo
430 | of Creative Commons without its prior written consent including,
431 | without limitation, in connection with any unauthorized modifications
432 | to any of its public licenses or any other arrangements,
433 | understandings, or agreements concerning use of licensed material. For
434 | the avoidance of doubt, this paragraph does not form part of the
435 | public licenses.
436 | 
437 | Creative Commons may be contacted at creativecommons.org.
438 | 


--------------------------------------------------------------------------------
/LLMs-papers.md:
--------------------------------------------------------------------------------
 1 | 1. [Lost in Translation: Large Language Models in Non-English Content
 2 |   Analysis](http://arxiv.org/abs/2306.07377v1)
 3 | 2. [Cedille: A large autoregressive French language model](http://arxiv.org/abs/2202.03371v1)
 4 | 3. [How Good are Commercial Large Language Models on African Languages?](http://arxiv.org/abs/2305.06530v1)
 5 | 4. [A Precis of Language Models are not Models of Language](http://arxiv.org/abs/2205.07634v1)
 6 | 5. [Using large language models for (de-)formalization and natural
 7 |   argumentation exercises for beginner's students](http://arxiv.org/abs/2304.06186v1)
 8 | 6. [Should we Stop Training More Monolingual Models, and Simply Use Machine
 9 |   Translation Instead?](http://arxiv.org/abs/2104.10441v1)
10 | 7. [Beyond the limitations of any imaginable mechanism: large language
11 |   models and psycholinguistics](http://arxiv.org/abs/2303.00077v1)
12 | 8. [Enhance Reasoning Ability of Visual-Language Models via Large Language
13 |   Models](http://arxiv.org/abs/2305.13267v1)
14 | 9. [Images in Language Space: Exploring the Suitability of Large Language
15 |   Models for Vision & Language Tasks](http://arxiv.org/abs/2305.13782v1)
16 | 10. [When Being Unseen from mBERT is just the Beginning: Handling New
17 |   Languages With Multilingual Language Models](http://arxiv.org/abs/2010.12858v2)
18 | 11. [Dynamic Fusion: Attentional Language Model for Neural Machine
19 |   Translation](http://arxiv.org/abs/1909.04879v1)
20 | 12. [Large Language Models are not Models of Natural Language: they are
21 |   Corpus Models](http://arxiv.org/abs/2112.07055v2)
22 | 13. [A Sentence is Worth a Thousand Pictures: Can Large Language Models
23 |   Understand Human Language?](http://arxiv.org/abs/2308.00109v1)
24 | 14. [BigTranslate: Augmenting Large Language Models with Multilingual
25 |   Translation Capability over 100 Languages](http://arxiv.org/abs/2305.18098v2)
26 | 15. [Re-visiting Automated Topic Model Evaluation with Large Language Models](http://arxiv.org/abs/2305.12152v1)
27 | 16. [Unsupervised Improvement of Factual Knowledge in Language Models](http://arxiv.org/abs/2304.01597v1)
28 | 17. [UIO at SemEval-2023 Task 12: Multilingual fine-tuning for sentiment
29 |   classification in low-resource languages](http://arxiv.org/abs/2304.14189v1)
30 | 18. [Multilingual Text Classification for Dravidian Languages](http://arxiv.org/abs/2112.01705v1)
31 | 19. [Can Large Language Models design a Robot?](http://arxiv.org/abs/2303.15324v1)
32 | 20. [Jigsaw: Large Language Models meet Program Synthesis](http://arxiv.org/abs/2112.02969v1)
33 | 21. [HinFlair: pre-trained contextual string embeddings for pos tagging and
34 |   text classification in the Hindi language](http://arxiv.org/abs/2101.06949v1)
35 | 22. [Larger-Scale Transformers for Multilingual Masked Language Modeling](http://arxiv.org/abs/2105.00572v1)
36 | 23. [Exploring Cross-lingual Textual Style Transfer with Large Multilingual
37 |   Language Models](http://arxiv.org/abs/2206.02252v1)
38 | 24. [In What Languages are Generative Language Models the Most Formal?
39 |   Analyzing Formality Distribution across Languages](http://arxiv.org/abs/2302.12299v1)
40 | 25. [Low-Resource Language Modelling of South African Languages](http://arxiv.org/abs/2104.00772v1)
41 | 26. [Dynamic Large Language Models on Blockchains](http://arxiv.org/abs/2307.10549v1)
42 | 27. [Reimagining Retrieval Augmented Language Models for Answering Queries](http://arxiv.org/abs/2306.01061v1)
43 | 28. [LEALLA: Learning Lightweight Language-agnostic Sentence Embeddings with
44 |   Knowledge Distillation](http://arxiv.org/abs/2302.08387v1)
45 | 29. [Benchmarking Language Models for Code Syntax Understanding](http://arxiv.org/abs/2210.14473v1)
46 | 30. [Can Character-based Language Models Improve Downstream Task Performance
47 |   in Low-Resource and Noisy Language Scenarios?](http://arxiv.org/abs/2110.13658v1)
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | > **Note**
  2 | > 
  3 | > **ChatPaper已经完成了近五年5w篇顶会论文总结，可以助你在科研道路更加顺利：https://chatpaper.org/**
  4 | 
  5 | <div align="center">
  6 |   <a href="https://github.com/WangRongsheng/ChatGenTitle">
  7 |     <img src="https://github.com/WangRongsheng/ChatGenTitle/blob/main/docs/images/logo.png" alt="Logo" height="180">
  8 |   </a>
  9 | 
 10 |   <p align="center">
 11 |     <h3> ChatGenTitle：使用百万arXiv论文信息在LLaMA模型上进行微调的论文题目生成模型 </h3>
 12 |     <p align="center">
 13 |       <a href="https://github.com/WangRongsheng/ChatGenTitle/blob/main/LICENSE">
 14 |         <img alt="GitHub Contributors" src="https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg" />
 15 |       </a>
 16 |       <a href="https://github.com/WangRongsheng/ChatGenTitle/graphs/contributors">
 17 |         <img alt="GitHub Contributors" src="https://img.shields.io/github/contributors/WangRongsheng/ChatGenTitle" />
 18 |       </a>
 19 |       <a href="https://github.com/WangRongsheng/ChatGenTitle/issues">
 20 |         <img alt="Issues" src="https://img.shields.io/github/issues/WangRongsheng/ChatGenTitle?color=0088ff" />
 21 |       </a>
 22 |       <a href="https://github.com/WangRongsheng/ChatGenTitle/pulls">
 23 |         <img alt="GitHub pull requests" src="https://img.shields.io/github/issues-pr/WangRongsheng/ChatGenTitle?color=0088ff" />
 24 |       </a>
 25 |       <a href=href="https://github.com/kaixindelele/ChatPaper/stargazers">
 26 |         <img src="https://img.shields.io/github/stars/WangRongsheng/ChatGenTitle?color=ccf">
 27 |       </a>
 28 |   </p>
 29 | </div>
 30 | 
 31 | <center><kbd><img src="./docs/images/usage.png" height="550px"/></kbd></center>
 32 | 
 33 | <p align="center">
 34 |       <em>一站式服务 / 简单 / 快速 / 高效 / 智能</em>
 35 |       <br/>
 36 |       <a href="#"><strong>视频教程</strong></a>
 37 |       <a href="https://github.com/WangRongsheng/ChatGenTitle/releases/tag/LLaMa-Lora-7B-cs-6-new-app"><strong>安装部署</strong></a>
 38 |       <a href="https://drive.google.com/file/d/1akrC4-YnYdiyD1_VK-92hncN7HS0FLf5/view?usp=sharing" target="_parent"><strong>在线体验</strong></a>
 39 |     </p>
 40 | 
 41 | # News
 42 | 
 43 | - 🎉🎉 训练数据集在[Cornell-University/arxiv](https://www.kaggle.com/datasets/Cornell-University/arxiv)，可以直接使用；
 44 | - 🎉🎉 所有模型在[HuggingFace](https://huggingface.co/wangrongsheng) 开源，可以直接使用；
 45 | - 🎉🎉 所有人可以在线免费体验ChatGenTitle，<a href="https://drive.google.com/file/d/1akrC4-YnYdiyD1_VK-92hncN7HS0FLf5/view?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a> ；
 46 | - 🎉🎉 由于缺乏GPU计算资源，我们发布了[在线部署版本](https://github.com/WangRongsheng/ChatGenTitle/releases/tag/LLaMa-Lora-7B-cs-6-new-app) 的所有代码和权重，可以在任何环境部署使用；
 47 | - 🎉🎉 arXiv上每天都会产生大量与LLMs相关的工作，该仓库每日自动推送30篇LLMs相关的论文供大家学习，[点击学习今日LLMs论文](https://github.com/WangRongsheng/ChatGenTitle/blob/main/LLMs-papers.md) ;
 48 | - 🎉🎉 正式发布[LLaMa-Lora-7B-3](https://drive.google.com/file/d/1c1uUizHP7jatrj6GxtppGYgZSKPWSExs/view?usp=sharing) 和 [LLaMa-Lora-7B-3-new](https://drive.google.com/file/d/1AuxbIzMXLX89TUPQTrEF2K-IyhF3OKiZ/view?usp=sharing) 版本的LoRA模型权重，允许本地部署使用；
 49 | - 🎉🎉 完成了基于[alpaca-lora](https://github.com/tloen/alpaca-lora) 上进行的`LLaMa-Lora-7B-3`和`LLaMa-Lora-13B-3`模型微调；
 50 | - 🎉🎉 开始了一项长期进行在`arXiv`上定时爬取[cs.AI](http://export.arxiv.org/rss/cs.AI) 、[cs.CV](http://export.arxiv.org/rss/cs.CV) 、[cs.LG](http://export.arxiv.org/rss/cs.LG) 论文的任务，目的是为了支持 CS 相关方向的研究；
 51 | - 🎉🎉 整理了`220W+`篇arXiv论文的元信息，这些元信息包括：`title`和`abstract`，更多的有：`id`、`submitter`、`authors`、`comments`、`journal-ref`、`doi`、`categories`、`versions`；
 52 | 
 53 | ## TODO
 54 | 
 55 | * [X] 完成LoRA对大模型微调的教程，[Enjoy it!!!](https://www.philschmid.de/fine-tune-flan-t5-peft)
 56 | * [ ] <del>发布arXiv（很快完成...）</del>
 57 | * [X] 完成ChatGenTitle、ChatGPT、GPT4的效果对比
 58 | * [X] 发布在线使用版本，[LLaMa-Lora-7B-cs-6-new-app](https://github.com/WangRongsheng/ChatGenTitle/releases/tag/LLaMa-Lora-7B-cs-6-new-app) <a href="https://drive.google.com/file/d/1akrC4-YnYdiyD1_VK-92hncN7HS0FLf5/view?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
 59 | 
 60 | # Release
 61 | 
 62 | > **Note**
 63 | > 
 64 | > Meta发布的LLaMA模型禁止商用，因此这里我们开源的是LoRA模型，LoRA模型必须搭配对应版本的LLaMA模型使用才可以，具体请看[Chinese-LLaMA-Alpaca
 65 | #合并模型](https://github.com/ymcui/Chinese-LLaMA-Alpaca#%E5%90%88%E5%B9%B6%E6%A8%A1%E5%9E%8B)
 66 | 
 67 | |模型名称|微调数据|微调基准模型|模型大小|微调时长|微调效果|
 68 | |:-|:-|:-|:-|:-|:-|
 69 | |✅[LLaMa-Lora-7B-3](https://huggingface.co/wangrongsheng/chatgentitle-lora-all-3)|arXiv-50-all|LLaMa-7B|-MB|9 hours|[点击查看](https://github.com/WangRongsheng/ChatGenTitle/blob/main/docs/images/7b-50-3-new.png)|
 70 | |✅[LLaMa-Lora-7B-3-new](https://huggingface.co/wangrongsheng/chatgentitle-lora-all-3-new) |arXiv-50-all|LLaMa-7B|-MB|12.5 hours|[点击查看](https://github.com/WangRongsheng/ChatGenTitle/blob/main/docs/images/7b-50-3-new.png)|
 71 | |✅[LLaMa-Lora-7B-cs-3-new](https://huggingface.co/wangrongsheng/chatgentitle-lora-cs-3-new) |arXiv-cs |LLaMa-7B|-MB|20.5 hours|[点击查看](https://github.com/WangRongsheng/ChatGenTitle/blob/main/docs/images/7b-cs-3.png)|
 72 | |✅[LLaMa-Lora-7B-cs-6-new](https://huggingface.co/wangrongsheng/chatgentitle-lora-cs-6-new) |arXiv-cs|LLaMa-7B|-MB|34 hours|[点击查看](https://github.com/WangRongsheng/ChatGenTitle/blob/main/docs/images/usage.png)|
 73 | |✅[LLaMa-Lora-13B-3](https://huggingface.co/wangrongsheng/chatgentitle-lora-all-13B-3) |arXiv-100-all|LLaMa-13B|-MB|26 hours|[点击查看](https://github.com/WangRongsheng/ChatGenTitle/blob/main/docs/images/13B-100-3.png)|
 74 | 
 75 | <details>
 76 |   <summary>训练设置</summary>
 77 |   <p>* 实验在A100 (4X, 80GB)上进行</p>
 78 | </details>
 79 | 
 80 | # Comparison
 81 | 
 82 | |提示词|摘要|原始论文题目|**ChatGenTitle**|ChatGPT(GPT3.5)|GPT4|ChatGLM(130B)|
 83 | |:-|:-|:-|:-|:-|:-|:-|
 84 | |提示词①|摘要①|Focus-RCNet: A lightweight recyclable waste classification algorithm based on Focus and knowledge distillation|Focus-RCNet: A Lightweight Convolutional Neural Network for Recyclable  Waste Image Classification|Focus-RCNet: A lightweight deep learning model for automated waste classification with enhanced recyclable waste image feature recognition|Efficient Waste Classification with Focus-RCNet: A Lightweight Deep Learning Architecture Employing Sandglass Structure, SimAM Attention Mechanism, and Knowledge Distillation for Real-Time Embedded Applications|超过Token长度|
 85 | |提示词②|摘要②|ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices|ShuffleNet: Efficient Convolutional Neural Networks for Mobile Devices|ShuffleNet: A Computation-Efficient CNN Architecture for Mobile Devices with Superior Performance in Object Detection and ImageNet Classification while Maintaining Accuracy|ShuffleNet: A Computationally Efficient CNN Architecture for Mobile Devices with Enhanced Performance in ImageNet Classification and MS COCO Object Detection|ShuffleNet: An Extremely Computation-Efficient CNN Architecture for Mobile Devices|
 86 | |提示词③|摘要③|Segment Anything|Segment Anything|Segment Anything: Introducing a New Task, Model, and Dataset for Promptable Image Segmentation with Superior Zero-Shot Performance|Exploring the Segment Anything Project: A Promptable Image Segmentation Model and Extensive Dataset with Impressive Zero-Shot Performance|Segment Anything (SA) Project: A New Task, Model, and Dataset for Image Segmentation|
 87 | 
 88 | <details>
 89 |   <summary>1. 提示词①和摘要①</summary>
 90 |   
 91 | - 提示词①：If you are an expert in writing papers, please generate a good paper title for this paper based on other authors' descriptions of their abstracts.
 92 | - 摘要①：Waste pollution is one of the most important environmental problems in the modern world. With the continuous improvement of the living standard of the population and the increasing richness of the consumption structure, the amount of domestic waste generated has increased dramatically and there is an urgent need for further waste treatment of waste. The rapid development of artificial intelligence provides an effective solution for automated waste classification. However, the large computational power and high complexity of algorithms make convolutional neural networks (CNNs) unsuitable for real-time embedded applications. In this paper, we propose a lightweight network architecture, Focus-RCNet, designed with reference to the sandglass structure of MobileNetV2, which uses deeply separable convolution to extract features from images. The Focus module is introduced into the field of recyclable waste image classification to reduce the dimensionality of features while retaining relevant information. In order to make the model focus more on waste image features while keeping the amount of parameters computationally small, we introduce the SimAM attention mechanism. Additionally, knowledge distillation is used to further compress the number of parameters in the model. By training and testing on the TrashNet dataset, the Focus-RCNet model not only achieves an accuracy of 92%, but also has high mobility of deployment.
 93 | 
 94 | </details>
 95 | 
 96 | <details>
 97 |   <summary>2. 提示词②和摘要②</summary>
 98 |   
 99 | - 提示词②：If you are an expert in writing papers, please generate a good paper title for this paper based on other authors' descriptions of their abstracts.
100 | - 摘要②：We introduce an extremely computation-efficient CNN architecture named ShuffleNet, which is designed specially for mobile devices with very limited computing power (e.g., 10-150 MFLOPs). The new architecture utilizes two new operations, pointwise group convolution and channel shuffle, to greatly reduce computation cost while maintaining accuracy. Experiments on ImageNet classification and MS COCO object detection demonstrate the superior performance of ShuffleNet over other structures, e.g. lower top-1 error (absolute 7.8%) than recent MobileNet on ImageNet classification task, under the computation budget of 40 MFLOPs. On an ARM-based mobile device, ShuffleNet achieves ~13x actual speedup over AlexNet while maintaining comparable accuracy.
101 | 
102 | </details>
103 | 
104 | <details>
105 |   <summary>3. 提示词③和摘要③</summary>
106 |   
107 | - 提示词③：If you are an expert in writing papers, please generate a good paper title for this paper based on other authors' descriptions of their abstracts.
108 | - 摘要③：We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images.
109 | 
110 | </details>
111 | 
112 | # Reference
113 | 
114 | > **Note**
115 | > 
116 | > 时代在进步，大模型（LLMs）也是，所以你可以每天来读30篇最新的关于LLM的Paper，保证你的知识不会跟丢！
117 | > 
118 | > 👉👉👉[**查看今日LLMs论文**](https://github.com/WangRongsheng/ChatGenTitle/blob/main/LLMs-papers.md)
119 | 
120 | - [stanford_alpaca](https://github.com/tatsu-lab/stanford_alpaca)
121 | - [alpaca-lora](https://github.com/tloen/alpaca-lora)
122 | - [ChatDoctor](https://github.com/Kent0n-Li/ChatDoctor)
123 | - [Chinese-alpaca-lora](https://github.com/LC1332/Chinese-alpaca-lora)
124 | - [cabrita](https://github.com/22-hours/cabrita)
125 | - [japanese-alpaca-lora](https://github.com/masa3141/japanese-alpaca-lora)
126 | - [Chinese-LLaMA-Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
127 | - [FastChat](https://github.com/lm-sys/FastChat)
128 | - [LLaMA-Adapter](https://github.com/ZrrSkywalker/LLaMA-Adapter)
129 | - [LMFlow](https://github.com/OptimalScale/LMFlow)
130 | - [中文科学文献数据集](https://github.com/ydli-ai/CSL)
131 | 
132 | # Knowledge
133 | 
134 | <details>
135 |   <summary>1. 关于Instruct微调和LoRa微调</summary>
136 |   
137 | > Instruct微调和LoRa微调是两种不同的技术。
138 | > Instruct微调是指在深度神经网络训练过程中调整模型参数的过程，以优化模型的性能。在微调过程中，使用一个预先训练好的模型作为基础模型，然后在新的数据集上对该模型进行微调。**Instruct微调是一种通过更新预训练模型的所有参数来完成的微调方法，通过微调使其适用于多个下游应用。**
139 | > LoRa微调则是指对低功耗广域网（LoRaWAN）中的LoRa节点参数进行微调的过程，以提高节点的传输效率。**在LoRa微调中，需要了解节点的硬件和网络部署情况，并通过对节点参数进行微小调整来优化传输效率。与Instruct微调相比，LoRA在每个Transformer块中注入可训练层，因为不需要为大多数模型权重计算梯度，大大减少了需要训练参数的数量并且降低了GPU内存的要求。**
140 | > **研究发现，使用LoRA进行的微调质量与全模型微调相当，速度更快并且需要更少的计算。因此，如果有低延迟和低内存需求的情况，建议使用LoRA微调。**
141 | 
142 | </details>
143 | 
144 | <details>
145 |   <summary>2. 为什么会有LLaMA模型和LoRA两种模型？</summary>
146 |   
147 | > 如1所述，模型的微调方式有很多种，基于LoRA的微调产生保存了新的权重，我们可以将生成的LoRA权重认为是一个原来LLaMA模型的[补丁权重](https://github.com/ymcui/Chinese-LLaMA-Alpaca#%EF%B8%8F-%E7%94%A8%E6%88%B7%E9%A1%BB%E7%9F%A5%E5%BF%85%E8%AF%BB) 。至于[LLaMA](https://github.com/facebookresearch/llama) 权重，它则是由Mean公司开源的大模型预训练权重。
148 | 
149 | </details>
150 | 
151 | 
152 | <details>
153 |   <summary>3. 关于词表扩充</summary>
154 |   
155 | > 加入词表是有一定破坏性的， 一是破坏原有分词体系，二是增加了未训练的权重。所以如果不能进行充分训练的话，可能会有比较大的问题。个人觉得如果不是特别专的领域（比如生物医学等涉及很多专业词汇的领域）没有太大必要去扩充英文词表。 [Chinese-LLaMA-Alpaca/issues/16](https://github.com/ymcui/Chinese-LLaMA-Alpaca/issues/16)
156 | 
157 | </details>
158 | 
159 | 
160 | 
161 | # LICENSE
162 | 
163 | This work is licensed under a
164 | [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License][cc-by-nc-sa].
165 | 
166 | **使用和许可声明**：ChatGenTitle 仅可以在获得许可下供研究使用。**使用仅可用于科学研究，不可用于实际论文写作中，否则，由此产生的一切后果由使用人负责!!!**
167 | 
168 | [![CC BY-NC-SA 4.0][cc-by-nc-sa-image]][cc-by-nc-sa]
169 | 
170 | [cc-by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/
171 | [cc-by-nc-sa-image]: https://licensebuttons.net/l/by-nc-sa/4.0/88x31.png
172 | [cc-by-nc-sa-shield]: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg
173 | 
174 | 
175 | # [![Repography logo](https://images.repography.com/logo.svg)](https://repography.com) / Recent activity [![Time period](https://images.repography.com/36500635/WangRongsheng/ChatGenTitle/recent-activity/L-1AGU6P13WbZ-SijIsJU-AgwfWmr0cwDwS-jixfjts/GOKgvyNJJRgkRPw9WsbDdmuSHTBrcD0AYA0WD0WDMIA_badge.svg)](https://repography.com)
176 | [![Timeline graph](https://images.repography.com/36500635/WangRongsheng/ChatGenTitle/recent-activity/L-1AGU6P13WbZ-SijIsJU-AgwfWmr0cwDwS-jixfjts/GOKgvyNJJRgkRPw9WsbDdmuSHTBrcD0AYA0WD0WDMIA_timeline.svg)](https://github.com/WangRongsheng/ChatGenTitle/commits)
177 | [![Issue status graph](https://images.repography.com/36500635/WangRongsheng/ChatGenTitle/recent-activity/L-1AGU6P13WbZ-SijIsJU-AgwfWmr0cwDwS-jixfjts/GOKgvyNJJRgkRPw9WsbDdmuSHTBrcD0AYA0WD0WDMIA_issues.svg)](https://github.com/WangRongsheng/ChatGenTitle/issues)
178 | [![Pull request status graph](https://images.repography.com/36500635/WangRongsheng/ChatGenTitle/recent-activity/L-1AGU6P13WbZ-SijIsJU-AgwfWmr0cwDwS-jixfjts/GOKgvyNJJRgkRPw9WsbDdmuSHTBrcD0AYA0WD0WDMIA_prs.svg)](https://github.com/WangRongsheng/ChatGenTitle/pulls)
179 | [![Trending topics](https://images.repography.com/36500635/WangRongsheng/ChatGenTitle/recent-activity/L-1AGU6P13WbZ-SijIsJU-AgwfWmr0cwDwS-jixfjts/GOKgvyNJJRgkRPw9WsbDdmuSHTBrcD0AYA0WD0WDMIA_words.svg)](https://github.com/WangRongsheng/ChatGenTitle/commits)
180 | [![Top contributors](https://images.repography.com/36500635/WangRongsheng/ChatGenTitle/recent-activity/L-1AGU6P13WbZ-SijIsJU-AgwfWmr0cwDwS-jixfjts/GOKgvyNJJRgkRPw9WsbDdmuSHTBrcD0AYA0WD0WDMIA_users.svg)](https://github.com/WangRongsheng/ChatGenTitle/graphs/contributors)
181 | [![Activity map](https://images.repography.com/36500635/WangRongsheng/ChatGenTitle/recent-activity/L-1AGU6P13WbZ-SijIsJU-AgwfWmr0cwDwS-jixfjts/GOKgvyNJJRgkRPw9WsbDdmuSHTBrcD0AYA0WD0WDMIA_map.svg)](https://github.com/WangRongsheng/ChatGenTitle/commits)
182 | 
183 | 
184 | 
185 | # Stargazers
186 | 	
187 | [![Stargazers over time](https://starchart.cc/WangRongsheng/ChatGenTitle.svg)](https://starchart.cc/WangRongsheng/ChatGenTitle)
188 | 
189 | <br><a href="https://github.com/Charmve/computer-vision-in-action#-以用促学先会后懂-"><img align="right" alt="Go for it!" src="https://raw.githubusercontent.com/Charmve/computer-vision-in-action/dd292873828228a753a9bd2de4576dbf8cc3902c/res/ui/footer-rocket.svg" height="220" title="Do what you like, and do it best!"/></a>
190 | <br>
191 | <p align="center">Feel free to ask any questions, open a PR if you feel something can be done differently!</p>
192 | <h2 align="center">🌟 Star this repository 🌟</h2>
193 | <p align="center">Created by <a href="https://github.com/WangRongsheng">WangRongsheng</a></p>
194 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | 数据存储：
 2 | 
 3 | - 按照时间日期保存
 4 | - 保存格式：
 5 | ```html
 6 | {
 7 | "instruction": "If you are a doctor, please answer the medical questions based on the patient's description.",
 8 | "input": "Doctor, I have been experiencing a hoarse voice for a few weeks now and it's not getting any better despite taking medication. What could be the problem?",
 9 | "output": "It's possible that you have a vocal cord polyp. To confirm this, we need to conduct tracheoscopy and laryngoscopy with a biopsy. We may also need to conduct an occupational therapy assessment such as speech therapy, other diagnostic procedures like an interview or consultation, physical therapy exercises, as well as diagnostic procedures on your nose, mouth, and pharynx to rule out any other underlying conditions. Additionally, we may need to evaluate your eyes and ears for any related conditions, such as ophthalmologic and otologic diagnosis and treatment."
10 | }
11 | 
12 | - instruction：system role
13 | - input：论文摘要
14 | - output: 论文题目
15 | ```
16 | 
17 | - 爬取目标：[https://arxiv.org/](https://arxiv.org/)


--------------------------------------------------------------------------------
/docs/Command/alpaca-lora.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```python
 3 | # 下载项目
 4 | git clone https://github.com/tloen/alpaca-lora.git
 5 | 
 6 | # 安装依赖
 7 | pip install -r requirements.txt
 8 | 
 9 | # 转化模型
10 | python src/transformers/models/llama/convert_llama_weights_to_hf.py \
11 |     --input_dir ../model/ \
12 |     --model_size 7B \
13 |     --output_dir ../model/7B-hf
14 | 	
15 | # 官方单卡训练模型
16 | python finetune.py \
17 |     --base_model '../model/7B-hf' \
18 |     --data_path '../train.json' \
19 |     --output_dir '../alpaca-lora-output'
20 | 
21 | # 单机多卡(4*A100)训练模型
22 | WORLD_SIZE=4 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master_port=3192 finetune.py \
23 |     --base_model '../model/7B-hf' \
24 |     --data_path '../train.json' \
25 |     --output_dir '../alpaca-lora-output' \
26 |     --batch_size 1024 \
27 |     --micro_batch_size 128 \
28 | 	--num_epochs 3
29 | 
30 | # alpaca-lora-new
31 | WORLD_SIZE=4 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master_port=3192 finetune.py \
32 |     --base_model='../model/7B-hf' \
33 |     --data_path '../train1.json' \
34 |     --num_epochs=3 \
35 |     --cutoff_len=512 \
36 |     --group_by_length \
37 |     --output_dir='../alpaca-lora-new-output' \
38 |     --lora_target_modules='[q_proj,k_proj,v_proj,o_proj]' \
39 |     --lora_r=16 \
40 |     --micro_batch_size=16
41 | 
42 | # 推理
43 | python generate.py \
44 |     --load_8bit \
45 |     --base_model '../model/7B-hf' \
46 |     --lora_weights '../alpaca-lora-output'
47 | ```
48 | 
49 | 


--------------------------------------------------------------------------------
/docs/Command/standford-alpaca.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```python
 3 | # 下载项目
 4 | git clone https://github.com/tatsu-lab/stanford_alpaca.git
 5 | 
 6 | # 安装
 7 | # 安装方式一
 8 | pip install git+https://github.com/huggingface/transformers@0041be5b3d1b9a5e1443e1825d7d80f6dfadcdaa
 9 | # 安装方式二
10 | git clone https://github.com/huggingface/transformers.git
11 | cd transformers
12 | git checkout 0041be5
13 | pip install .
14 | 
15 | # 下载转化仓库
16 | git clone https://github.com/huggingface/transformers.git
17 | 
18 | # 安装一个额外的库
19 | python -m pip install accelerate
20 | 
21 | # 转化模型
22 | python src/transformers/models/llama/convert_llama_weights_to_hf.py \
23 |     --input_dir ../model/ \
24 |     --model_size 7B \
25 |     --output_dir ../model/7B-hf
26 | 
27 | # 训练模型
28 | torchrun --nproc_per_node=4 --master_port=11223 train.py \
29 |     --model_name_or_path ../model/7B-hf \
30 |     --data_path ../train.json \
31 |     --bf16 True \
32 |     --output_dir ../stanford_alpaca_output \
33 |     --num_train_epochs 3 \
34 |     --per_device_train_batch_size 4 \
35 |     --per_device_eval_batch_size 4 \
36 |     --gradient_accumulation_steps 8 \
37 |     --evaluation_strategy "no" \
38 |     --save_strategy "steps" \
39 |     --save_steps 2000 \
40 |     --save_total_limit 1 \
41 |     --learning_rate 2e-5 \
42 |     --weight_decay 0. \
43 |     --warmup_ratio 0.03 \
44 |     --lr_scheduler_type "cosine" \
45 |     --logging_steps 1 \
46 |     --fsdp "full_shard auto_wrap" \
47 |     --fsdp_transformer_layer_cls_to_wrap 'LLaMADecoderLayer' \
48 |     --tf32 True
49 | ```
50 | 
51 | 1. [参考1：standford-alpaca微调记录](https://zhuanlan.zhihu.com/p/616119919)
52 | 2. [参考2：stanford_alpaca/issues](https://github.com/tatsu-lab/stanford_alpaca/issues)


--------------------------------------------------------------------------------
/docs/Command/test.txt:
--------------------------------------------------------------------------------
 1 | If you are an expert in writing papers, please generate a good paper title for this paper based on other authors' descriptions of their abstracts.
 2 | 
 3 | 请帮我根据以下论文摘要生成一个合适的论文题目：
 4 | 
 5 | Focus-RCNet: A lightweight recyclable waste classification algorithm based on Focus and knowledge distillation
 6 | Focus-RCNet: A Lightweight Convolutional Neural Network for Recyclable  Waste Image Classification
 7 | Focus-RCNet: A lightweight deep learning model for automated waste classification with enhanced recyclable waste image feature recognition
 8 | Efficient Waste Classification with Focus-RCNet: A Lightweight Deep Learning Architecture Employing Sandglass Structure, SimAM Attention Mechanism, and Knowledge Distillation for Real-Time Embedded Applications
 9 | 
10 | Waste pollution is one of the most important environmental problems in the modern world. With the continuous improvement of the living standard of the population and the increasing richness of the consumption structure, the amount of domestic waste generated has increased dramatically and there is an urgent need for further waste treatment of waste. The rapid development of artificial intelligence provides an effective solution for automated waste classification. However, the large computational power and high complexity of algorithms make convolutional neural networks (CNNs) unsuitable for real-time embedded applications. In this paper, we propose a lightweight network architecture, Focus-RCNet, designed with reference to the sandglass structure of MobileNetV2, which uses deeply separable convolution to extract features from images. The Focus module is introduced into the field of recyclable waste image classification to reduce the dimensionality of features while retaining relevant information. In order to make the model focus more on waste image features while keeping the amount of parameters computationally small, we introduce the SimAM attention mechanism. Additionally, knowledge distillation is used to further compress the number of parameters in the model. By training and testing on the TrashNet dataset, the Focus-RCNet model not only achieves an accuracy of 92%, but also has high mobility of deployment.
11 | 
12 | ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
13 | ShuffleNet: Efficient Convolutional Neural Networks for Mobile Devices
14 | ShuffleNet: A Computation-Efficient CNN Architecture for Mobile Devices with Superior Performance in Object Detection and ImageNet Classification while Maintaining Accuracy
15 | ShuffleNet: A Computationally Efficient CNN Architecture for Mobile Devices with Enhanced Performance in ImageNet Classification and MS COCO Object Detection
16 | 
17 | We introduce an extremely computation-efficient CNN architecture named ShuffleNet, which is designed specially for mobile devices with very limited computing power (e.g., 10-150 MFLOPs). The new architecture utilizes two new operations, pointwise group convolution and channel shuffle, to greatly reduce computation cost while maintaining accuracy. Experiments on ImageNet classification and MS COCO object detection demonstrate the superior performance of ShuffleNet over other structures, e.g. lower top-1 error (absolute 7.8%) than recent MobileNet on ImageNet classification task, under the computation budget of 40 MFLOPs. On an ARM-based mobile device, ShuffleNet achieves ~13x actual speedup over AlexNet while maintaining comparable accuracy.
18 | 
19 | Segment Anything
20 | Segment Anything
21 | Segment Anything: Introducing a New Task, Model, and Dataset for Promptable Image Segmentation with Superior Zero-Shot Performance
22 | Exploring the Segment Anything Project: A Promptable Image Segmentation Model and Extensive Dataset with Impressive Zero-Shot Performance
23 | 
24 | We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images.
25 | 


--------------------------------------------------------------------------------
/docs/Command/命令.txt:
--------------------------------------------------------------------------------
 1 | cd /mnt/c/Users/60332/Desktop
 2 | 
 3 | \\wsl$
 4 | 
 5 | rsync -aczP ./finetune_multidata.py a100:/tmp/test/alpaca-lora/
 6 | 
 7 | rsync -aczP ./cs.json a100:/tmp/test/
 8 | 
 9 | rsync -avzP a100:/tmp/test/7b-cs-6.zip /mnt/c/Users/60332/Desktop
10 | 
11 | rsync -avzP a100:/tmp/test/model/7b.zip /mnt/c/Users/60332/Desktop
12 | 
13 | history -c
14 | 
15 | ssh -L 8888:127.0.0.1:端口 a100
16 | 
17 | If you are an expert in writing papers, please generate a good paper title for this paper based on other authors' descriptions of their abstracts.
18 | 
19 | You should also tell me which words in the abstract influenced the creation of this paper title.
20 | 
21 | Waste pollution is one of the most important environmental problems in the modern world. With the continuous improvement of the living standard of the population and the increasing richness of the consumption structure, the amount of domestic waste generated has increased dramatically and there is an urgent need for further waste treatment of waste. The rapid development of artificial intelligence provides an effective solution for automated waste classification. However, the large computational power and high complexity of algorithms make convolutional neural networks (CNNs) unsuitable for real-time embedded applications. In this paper, we propose a lightweight network architecture, Focus-RCNet, designed with reference to the sandglass structure of MobileNetV2, which uses deeply separable convolution to extract features from images. The Focus module is introduced into the field of recyclable waste image classification to reduce the dimensionality of features while retaining relevant information. In order to make the model focus more on waste image features while keeping the amount of parameters computationally small, we introduce the SimAM attention mechanism. Additionally, knowledge distillation is used to further compress the number of parameters in the model. By training and testing on the TrashNet dataset, the Focus-RCNet model not only achieves an accuracy of 92%, but also has high mobility of deployment.
22 | 
23 | python src/transformers/models/llama/convert_llama_weights_to_hf.py \
24 |     --input_dir ../model/ \
25 |     --model_size 13B \
26 |     --output_dir ../model/13B-hf
27 | 
28 | WORLD_SIZE=4 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master_port=3192 finetune.py \
29 |     --base_model '../model/13B-hf' \
30 |     --data_path '../train100-1.json' \
31 |     --output_dir '../alpaca-lora-100-output' \
32 |     --batch_size 512 \
33 |     --micro_batch_size 16
34 | 
35 | python finetune.py \
36 |     --base_model='../model/7B-hf' \
37 |     --data_path '../train1.json' \
38 |     --num_epochs=3 \
39 |     --cutoff_len=512 \
40 |     --group_by_length \
41 |     --output_dir='../alpaca-lora-new-output' \
42 |     --lora_target_modules='[q_proj,k_proj,v_proj,o_proj]' \
43 |     --lora_r=16 \
44 |     --micro_batch_size=16
45 | 
46 | WORLD_SIZE=4 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master_port=3192 finetune.py \
47 |     --base_model='../model/13B-hf' \
48 |     --data_path '../cs.json' \
49 |     --num_epochs=3 \
50 |     --cutoff_len=512 \
51 |     --group_by_length \
52 |     --output_dir='../alpaca-lora-cs-13-output' \
53 |     --lora_target_modules='[q_proj,k_proj,v_proj,o_proj]' \
54 |     --lora_r=16 \
55 |     --micro_batch_size=16
56 | 
57 | python generate.py \
58 |     --load_8bit \
59 |     --base_model '../model/7B-hf' \
60 |     --lora_weights '../alpaca-lora-cs-13-output'


--------------------------------------------------------------------------------
/docs/images/13B-100-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/images/13B-100-3.png


--------------------------------------------------------------------------------
/docs/images/7b-50-3-new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/images/7b-50-3-new.png


--------------------------------------------------------------------------------
/docs/images/7b-50-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/images/7b-50-3.png


--------------------------------------------------------------------------------
/docs/images/7b-cs-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/images/7b-cs-3.png


--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/images/logo.png


--------------------------------------------------------------------------------
/docs/images/usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/images/usage.png


--------------------------------------------------------------------------------
/docs/introduction/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/introduction/1.png


--------------------------------------------------------------------------------
/docs/introduction/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/introduction/2.png


--------------------------------------------------------------------------------
/docs/introduction/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/docs/introduction/3.png


--------------------------------------------------------------------------------
/docs/introduction/介绍.md:
--------------------------------------------------------------------------------
  1 | ![](1.png)
  2 | 
  3 | **项目名称**：ChatGenTitle：使用百万arXiv论文信息在LLaMA模型上进行微调的论文题目生成模型
  4 | 
  5 | **开源地址**：https://github.com/WangRongsheng/ChatGenTitle
  6 | 
  7 | ## 项目背景
  8 | 
  9 | 科研论文写作中，生成一个有吸引力的、准确的论文标题需要综合考虑多个因素，这是论文作者面临的一个重要挑战。生成一个论文标题的难点有：
 10 | 
 11 | 1. 简洁但准确：一个好的论文标题应该简洁、精炼，但同时又能准确地反映出论文研究的重点和核心所在，这对于作者来说是一个巨大的挑战。
 12 | 2. 独特但易于理解：论文题目应该是独特的，能够吸引读者的兴趣，但同时也要易于理解，避免过于笼统或过于繁琐深奥的词汇。
 13 | 3. 体现研究的贡献：好的论文题目应该能够明确体现出研究的贡献，突出研究创新点，使读者对该研究的贡献显而易见。
 14 | 4. 避免使用口头禅：一些常用的词汇、短语等可能被过多的使用，这样会使得论文的题目显得陈旧、无创新性，甚至会让人感到毫无意义。
 15 | 
 16 | 最近，以ChatGPT、GPT-4等为代表的大语言模型（Large Language Model, LLM）掀起了新一轮自然语言处理领域的研究浪潮，展现出了类通用人工智能（AGI）的能力，受到业界广泛关注。在这些工作以外，许多学者开始关注以低成本实现个人"ChatGPT"的方案，如：stanford_alpaca[1]、alpaca-lora[2]，这些方案聚焦于大模型微调，然而我们更期望探索大模型在下游任务的落地。
 17 | 
 18 | 为此，我们关注到论文题目生成领域，ArXiv（全称为：The arXiv.org e-Print archive）是一个由康奈尔大学创建和维护的一个免费、开放的学术预印本社区，它于1991年创立。ArXiv是全球数学、物理学等学科的电子预印本和会议论文库，包含了众多的高质量学术论文和研究报告，覆盖面日益增广。arXiv中包含了众多高质量的论文元信息。通过arXiv上开放的论文信息，我们构建了一个包含220万篇论文元信息的数据库。这些数据通过数据清洗等被构建成了可以用于大模型微调的数据对。
 19 | 
 20 | 将这些论文元信息引入大模型微调，它可以对生成论文题目涉及的难点产生积极影响，它可以从以下几个方面提供帮助：
 21 | 
 22 | 1. 提供更准确、广泛的语言模型：大模型通常使用了大量数据进行训练，因此其语言模型可以更准确地解释自然语言，能够应对更多的语言场景，提升论文题目的语言表达能力。
 23 | 2. 提供更加精准的语义理解：大模型采用了深度学习的技术手段，能够构建语言的高维向量表示，从而提供更准确的语义理解能力，帮助生成更精确、准确的论文题目。
 24 | 3. 增强创造性和创新性：大模型使用了大量的训练数据，并能够从数据中提取规律，从而提供更多的词汇或句子组合方式，增强了生成论文题目的创造性和创新性。
 25 | 4. 提高效率：相比传统的手动方式，使用大模型来生成论文题目可以极大地提高效率，不仅减少了需要写出标题的时间，同时也不容易产生显著的错误，提高了输出的质量。
 26 | 
 27 | 总之，引入大模型可以提供更好的帮助来解决生成论文题目的难点，有望提升分析、抽象、创新等能力。
 28 | 
 29 | ## arXiv数据集介绍
 30 | 
 31 | 我们所搜集的论文元信息包含全部的学科分类，如：
 32 | 
 33 | 1. 计算机科学（Computer Science）
 34 | 2. 数学（Mathematics）
 35 | 3. 物理学（Physics）
 36 | 4. 统计学（Statistics）
 37 | 5. 电气工程和系统科学（Electrical Engineering and Systems Science）
 38 | 6. 经济学（Economics）
 39 | 7. 量子物理（Quantum Physics）
 40 | 8. 材料科学（Materials Science）
 41 | 9. 生物学（Biology）
 42 | 10. 量化金融（Quantitative Finance）
 43 | 11. 信息科学（Information Science）
 44 | 12. 交叉学科（Interdisciplinary）。
 45 | 
 46 | 每个大类下面还有很多具体的子类，如计算机科学大类下又包括计算机视觉、机器学习、人工智能、计算机网络等子类。如果您想找到特定领域的论文，可以根据这些分类进行选择。
 47 | 
 48 | 每一篇论文都包含如下字段的元信息：
 49 | ```json
 50 | {
 51 | 	"id":string"0704.0001",
 52 | 	"submitter":string"Pavel Nadolsky",
 53 | 	"authors":string"C. Bal\'azs, E. L. Berger, P. M. Nadolsky, C.-P. Yuan",
 54 | 	"title":string"Calculation of prompt diphoton production cross sections at Tevatron and LHC energies",
 55 | 	"comments":string"37 pages, 15 figures; published version",
 56 | 	"journal-ref":string"Phys.Rev.D76:013009,2007",
 57 | 	"doi":string"10.1103/PhysRevD.76.013009",
 58 | 	"report-no":string"ANL-HEP-PR-07-12",
 59 | 	"categories":string"hep-ph",
 60 | 	"license":NULL,
 61 | 	"abstract":string" A fully differential calculation in perturbative quantum chromodynamics is presented for the production of massive photon pairs at hadron colliders. All next-to-leading order perturbative contributions from quark-antiquark, gluon-(anti)quark, and gluon-gluon subprocesses are included, as well as all-orders resummation of initial-state gluon radiation valid at next-to-next-to-leading logarithmic accuracy. The region of phase space is specified in which the calculation is most reliable. Good agreement is demonstrated with data from the Fermilab Tevatron, and predictions are made for more detailed tests with CDF and DO data. Predictions are shown for distributions of diphoton pairs produced at the energy of the Large Hadron Collider (LHC). Distributions of the diphoton pairs from the decay of a Higgs boson are contrasted with those produced from QCD processes at the LHC, showing that enhanced sensitivity to the signal can be obtained with judicious selection of events. ",
 62 | 	"versions": 
 63 | }
 64 | ```
 65 | 
 66 | - id: ArXiv ID (can be used to access the paper, see below)
 67 | - submitter: Who submitted the paper
 68 | - authors: Authors of the paper
 69 | - title: Title of the paper
 70 | - comments: Additional info, such as number of pages and figures
 71 | - journal-ref: Information about the journal the paper was published in
 72 | - doi: [https://www.doi.org](Digital Object Identifier)
 73 | - abstract: The abstract of the paper
 74 | - categories: Categories / tags in the ArXiv system
 75 | - versions: A version history
 76 | 
 77 | ## LLMs微调
 78 | 
 79 | ChatGenTitle基于Meta的LLaMA模型进行微调，微调主流的方法有：Instruct微调和LoRa微调。
 80 | 
 81 | Instruct微调和LoRa微调是两种不同的技术。Instruct微调是指在深度神经网络训练过程中调整模型参数的过程，以优化模型的性能。在微调过程中，使用一个预先训练好的模型作为基础模型，然后在新的数据集上对该模型进行微调。Instruct微调是一种通过更新预训练模型的所有参数来完成的微调方法，通过微调使其适用于多个下游应用。LoRa微调则是指对低功耗广域网（LoRaWAN）中的LoRa节点参数进行微调的过程，以提高节点的传输效率。在LoRa微调中，需要了解节点的硬件和网络部署情况，并通过对节点参数进行微小调整来优化传输效率。与Instruct微调相比，LoRA在每个Transformer块中注入可训练层，因为不需要为大多数模型权重计算梯度，大大减少了需要训练参数的数量并且降低了GPU内存的要求。 研究发现，使用LoRA进行的微调质量与全模型微调相当，速度更快并且需要更少的计算。因此，如果有低延迟和低内存需求的情况，建议使用LoRA微调。
 82 | 
 83 | 因此我们选择使用LoRA微调构建整个ChatGenTitle。
 84 | 
 85 | ```python
 86 | # 下载项目
 87 | git clone https://github.com/tloen/alpaca-lora.git
 88 | 
 89 | # 安装依赖
 90 | pip install -r requirements.txt
 91 | 
 92 | # 转化模型
 93 | python src/transformers/models/llama/convert_llama_weights_to_hf.py \
 94 |     --input_dir ../model/ \
 95 |     --model_size 7B \
 96 |     --output_dir ../model/7B-hf
 97 | 	
 98 | # 单机单卡训练模型
 99 | python finetune.py \
100 |     --base_model '../model/7B-hf' \
101 |     --data_path '../train.json' \
102 |     --output_dir '../alpaca-lora-output'
103 | 
104 | # 单机多卡(4*A100)训练模型
105 | WORLD_SIZE=4 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master_port=3192 finetune.py \
106 |     --base_model '../model/7B-hf' \
107 |     --data_path '../train.json' \
108 |     --output_dir '../alpaca-lora-output' \
109 |     --batch_size 1024 \
110 |     --micro_batch_size 128 \
111 | 	--num_epochs 3
112 | ```
113 | 
114 | ## 在线访问
115 | 
116 | 在开始部署使用之前，我们需要知道两个模型的定义。整个项目会有LLaMA和LoRA两种模型，LoRA模型是我们微调产生保存的权重，LLaMA 权重则是由Meta公司开源的大模型预训练权重。我们可以将生成的LoRA权重认为是一个原来LLaMA模型的补丁权重。因此我们要同时加载两种不同模型。目前我们已经提供的LoRA模型有：
117 | 
118 | |模型名称|微调数据|微调基准模型|模型大小|微调时长|
119 | |:-|:-|:-|:-|:-|
120 | |LLaMa-Lora-7B-3|arXiv-50-all|LLaMa-7B|148.1MB|9 hours|
121 | |LLaMa-Lora-7B-3-new |arXiv-50-all|LLaMa-7B|586MB|12.5 hours|
122 | |LLaMa-Lora-13B-3|arXiv-100-all|LLaMa-13B|230.05MB|26 hours|
123 | 
124 | > 更多模型将会很快发布！
125 | 
126 | 准备好需要的两种权重，就可以开启使用：
127 | ```python
128 | # 推理
129 | python generate.py \
130 |     --load_8bit \
131 |     --base_model '../model/7B-hf' \
132 |     --lora_weights '../alpaca-lora-output'
133 | ```
134 | 
135 | 当模型运行以后，访问`127.0.0.1:7860`即可。
136 | 
137 | ![](2.png)
138 | 
139 | 然后在`Instruction`中输入：
140 | ```python
141 | If you are an expert in writing papers, please generate a good paper title for this paper based on other authors' descriptions of their abstracts.
142 | ```
143 | 
144 | 在`Input`中输入：
145 | ```python
146 | <你论文的摘要>：Waste pollution is one of the most important environmental problems in the modern world. With the continuous improvement of the living standard of the population and the increasing richness of the consumption structure, the amount of domestic waste generated has increased dramatically and there is an urgent need for further waste treatment of waste. The rapid development of artificial intelligence provides an effective solution for automated waste classification. However, the large computational power and high complexity of algorithms make convolutional neural networks (CNNs) unsuitable for real-time embedded applications. In this paper, we propose a lightweight network architecture, Focus-RCNet, designed with reference to the sandglass structure of MobileNetV2, which uses deeply separable convolution to extract features from images. The Focus module is introduced into the field of recyclable waste image classification to reduce the dimensionality of features while retaining relevant information. In order to make the model focus more on waste image features while keeping the amount of parameters computationally small, we introduce the SimAM attention mechanism. Additionally, knowledge distillation is used to further compress the number of parameters in the model. By training and testing on the TrashNet dataset, the Focus-RCNet model not only achieves an accuracy of 92%, but also has high mobility of deployment.
147 | ```
148 | 
149 | 点击`Submit`等待即可！
150 | 
151 | ![](3.png)
152 | 
153 | `Output`输出即为ChatGenTitle为你生成的论文题目。
154 | 
155 | ## 参考
156 | 
157 | [1] https://github.com/tatsu-lab/stanford_alpaca
158 | [2] https://github.com/tloen/alpaca-lora


--------------------------------------------------------------------------------
/docs/notebook/chatgentitle_inference_in_colab.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "machine_shape": "hm"
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     },
 16 |     "accelerator": "GPU",
 17 |     "gpuClass": "standard"
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "source": [
 23 |         "# Check GPU's Memory Capacity\n",
 24 |         "\n",
 25 |         "By running `nvidia-smi` command, you can find out the GPU's memory capacity on the current system. \n",
 26 |         "\n",
 27 |         "With the standard GPU instance(___T4___) which is free, you can run 7B and 13B models. With the premium GPU instance(___A100 40GB___) which is paid with the compute unit that you own, you can even run 30B model! Choose the instance at the menu `Runtime` -> `Change runtime type` -> `Hardware accelerator (GPU)` -> `GPU class (Standard or Premium)`"
 28 |       ],
 29 |       "metadata": {
 30 |         "id": "xf3pUNyVO3WS"
 31 |       }
 32 |     },
 33 |     {
 34 |       "cell_type": "code",
 35 |       "source": [
 36 |         "!nvidia-smi"
 37 |       ],
 38 |       "metadata": {
 39 |         "id": "L2MoM27rfaKK",
 40 |         "colab": {
 41 |           "base_uri": "https://localhost:8080/"
 42 |         },
 43 |         "outputId": "37daa644-f021-4a55-c007-86eb96ccdf9b"
 44 |       },
 45 |       "execution_count": null,
 46 |       "outputs": [
 47 |         {
 48 |           "output_type": "stream",
 49 |           "name": "stdout",
 50 |           "text": [
 51 |             "Sun Apr  9 17:17:36 2023       \n",
 52 |             "+-----------------------------------------------------------------------------+\n",
 53 |             "| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |\n",
 54 |             "|-------------------------------+----------------------+----------------------+\n",
 55 |             "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
 56 |             "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
 57 |             "|                               |                      |               MIG M. |\n",
 58 |             "|===============================+======================+======================|\n",
 59 |             "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
 60 |             "| N/A   49C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |\n",
 61 |             "|                               |                      |                  N/A |\n",
 62 |             "+-------------------------------+----------------------+----------------------+\n",
 63 |             "                                                                               \n",
 64 |             "+-----------------------------------------------------------------------------+\n",
 65 |             "| Processes:                                                                  |\n",
 66 |             "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
 67 |             "|        ID   ID                                                   Usage      |\n",
 68 |             "|=============================================================================|\n",
 69 |             "|  No running processes found                                                 |\n",
 70 |             "+-----------------------------------------------------------------------------+\n"
 71 |           ]
 72 |         }
 73 |       ]
 74 |     },
 75 |     {
 76 |       "cell_type": "markdown",
 77 |       "source": [
 78 |         "# Clone the repository"
 79 |       ],
 80 |       "metadata": {
 81 |         "id": "N0MDD9TuPTfJ"
 82 |       }
 83 |     },
 84 |     {
 85 |       "cell_type": "code",
 86 |       "source": [
 87 |         "!git clone --branch alpaca-lora-new https://github.com/WangRongsheng/ChatGenTitle.git"
 88 |       ],
 89 |       "metadata": {
 90 |         "id": "a_i5DKBNnzAK",
 91 |         "colab": {
 92 |           "base_uri": "https://localhost:8080/"
 93 |         },
 94 |         "outputId": "9612bb9a-997a-4267-810a-99af621092a3"
 95 |       },
 96 |       "execution_count": null,
 97 |       "outputs": [
 98 |         {
 99 |           "output_type": "stream",
100 |           "name": "stdout",
101 |           "text": [
102 |             "Cloning into 'ChatGenTitle'...\n",
103 |             "remote: Enumerating objects: 478, done.\u001b[K\n",
104 |             "remote: Counting objects: 100% (189/189), done.\u001b[K\n",
105 |             "remote: Compressing objects: 100% (136/136), done.\u001b[K\n",
106 |             "remote: Total 478 (delta 125), reused 60 (delta 43), pack-reused 289\u001b[K\n",
107 |             "Receiving objects: 100% (478/478), 2.80 MiB | 16.04 MiB/s, done.\n",
108 |             "Resolving deltas: 100% (219/219), done.\n"
109 |           ]
110 |         }
111 |       ]
112 |     },
113 |     {
114 |       "cell_type": "code",
115 |       "source": [
116 |         "%cd ChatGenTitle"
117 |       ],
118 |       "metadata": {
119 |         "colab": {
120 |           "base_uri": "https://localhost:8080/"
121 |         },
122 |         "id": "Brj2nl57B1Zm",
123 |         "outputId": "c5ac3e0a-6c2a-431a-ba4e-e6ceeccdb4d9"
124 |       },
125 |       "execution_count": null,
126 |       "outputs": [
127 |         {
128 |           "output_type": "stream",
129 |           "name": "stdout",
130 |           "text": [
131 |             "/content/ChatGenTitle\n"
132 |           ]
133 |         }
134 |       ]
135 |     },
136 |     {
137 |       "cell_type": "markdown",
138 |       "source": [
139 |         "# Move into the directory of the cloned repository"
140 |       ],
141 |       "metadata": {
142 |         "id": "HUuzxWGuPYLq"
143 |       }
144 |     },
145 |     {
146 |       "cell_type": "code",
147 |       "source": [
148 |         "ls"
149 |       ],
150 |       "metadata": {
151 |         "id": "wR-M8u7gsQqg",
152 |         "colab": {
153 |           "base_uri": "https://localhost:8080/"
154 |         },
155 |         "outputId": "503851a2-2c21-463d-e6fc-bf949c9fea62"
156 |       },
157 |       "execution_count": null,
158 |       "outputs": [
159 |         {
160 |           "output_type": "stream",
161 |           "name": "stdout",
162 |           "text": [
163 |             "\u001b[0m\u001b[01;34mdata\u001b[0m/                            generate.py                   requirements.txt\n",
164 |             "\u001b[01;34mdocs\u001b[0m/                            get_arxiv_multiprocessing.py  \u001b[01;34mtemplates\u001b[0m/\n",
165 |             "export_hf_checkpoint.py          LICENSE                       \u001b[01;34mutils\u001b[0m/\n",
166 |             "export_state_dict_checkpoint.py  README.md\n",
167 |             "finetune.py                      requirements-all.txt\n"
168 |           ]
169 |         }
170 |       ]
171 |     },
172 |     {
173 |       "cell_type": "markdown",
174 |       "source": [
175 |         "# Install dependencies"
176 |       ],
177 |       "metadata": {
178 |         "id": "XG8oy7BBPdMh"
179 |       }
180 |     },
181 |     {
182 |       "cell_type": "code",
183 |       "source": [
184 |         "!pip install -r requirements.txt"
185 |       ],
186 |       "metadata": {
187 |         "id": "moN-15x_ifHE",
188 |         "colab": {
189 |           "base_uri": "https://localhost:8080/"
190 |         },
191 |         "outputId": "384d73cd-8845-4bee-99e7-49f750d69adc"
192 |       },
193 |       "execution_count": null,
194 |       "outputs": [
195 |         {
196 |           "output_type": "stream",
197 |           "name": "stdout",
198 |           "text": [
199 |             "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
200 |             "Collecting git+https://github.com/huggingface/peft.git (from -r requirements.txt (line 9))\n",
201 |             "  Cloning https://github.com/huggingface/peft.git to /tmp/pip-req-build-q07mxqux\n",
202 |             "  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/peft.git /tmp/pip-req-build-q07mxqux\n",
203 |             "  Resolved https://github.com/huggingface/peft.git to commit 1117d4772109a098787ce7fc297cb6cd641de6eb\n",
204 |             "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
205 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
206 |             "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
207 |             "Collecting git+https://github.com/huggingface/transformers.git (from -r requirements.txt (line 10))\n",
208 |             "  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-sjzym39u\n",
209 |             "  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-sjzym39u\n",
210 |             "  Resolved https://github.com/huggingface/transformers.git to commit 656e869a4523f6a0ce90b3aacbb05cc8fb5794bb\n",
211 |             "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
212 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
213 |             "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
214 |             "Collecting accelerate\n",
215 |             "  Downloading accelerate-0.18.0-py3-none-any.whl (215 kB)\n",
216 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m215.3/215.3 KB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
217 |             "\u001b[?25hRequirement already satisfied: appdirs in /usr/local/lib/python3.9/dist-packages (from -r requirements.txt (line 2)) (1.4.4)\n",
218 |             "Collecting loralib\n",
219 |             "  Downloading loralib-0.1.1-py3-none-any.whl (8.8 kB)\n",
220 |             "Collecting bitsandbytes\n",
221 |             "  Downloading bitsandbytes-0.37.2-py3-none-any.whl (84.2 MB)\n",
222 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.2/84.2 MB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
223 |             "\u001b[?25hCollecting black\n",
224 |             "  Downloading black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n",
225 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m68.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
226 |             "\u001b[?25hCollecting datasets\n",
227 |             "  Downloading datasets-2.11.0-py3-none-any.whl (468 kB)\n",
228 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m468.7/468.7 KB\u001b[0m \u001b[31m42.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
229 |             "\u001b[?25hCollecting fire\n",
230 |             "  Downloading fire-0.5.0.tar.gz (88 kB)\n",
231 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m88.3/88.3 KB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
232 |             "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
233 |             "Collecting sentencepiece\n",
234 |             "  Downloading sentencepiece-0.1.97-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
235 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m59.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
236 |             "\u001b[?25hCollecting gradio\n",
237 |             "  Downloading gradio-3.24.1-py3-none-any.whl (15.7 MB)\n",
238 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m63.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
239 |             "\u001b[?25hRequirement already satisfied: torch>=1.4.0 in /usr/local/lib/python3.9/dist-packages (from accelerate->-r requirements.txt (line 1)) (2.0.0+cu118)\n",
240 |             "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from accelerate->-r requirements.txt (line 1)) (23.0)\n",
241 |             "Requirement already satisfied: psutil in /usr/local/lib/python3.9/dist-packages (from accelerate->-r requirements.txt (line 1)) (5.9.4)\n",
242 |             "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from accelerate->-r requirements.txt (line 1)) (1.22.4)\n",
243 |             "Requirement already satisfied: pyyaml in /usr/local/lib/python3.9/dist-packages (from accelerate->-r requirements.txt (line 1)) (6.0)\n",
244 |             "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.9/dist-packages (from black->-r requirements.txt (line 5)) (8.1.3)\n",
245 |             "Collecting mypy-extensions>=0.4.3\n",
246 |             "  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
247 |             "Collecting pathspec>=0.9.0\n",
248 |             "  Downloading pathspec-0.11.1-py3-none-any.whl (29 kB)\n",
249 |             "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.9/dist-packages (from black->-r requirements.txt (line 5)) (2.0.1)\n",
250 |             "Requirement already satisfied: typing-extensions>=3.10.0.0 in /usr/local/lib/python3.9/dist-packages (from black->-r requirements.txt (line 5)) (4.5.0)\n",
251 |             "Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.9/dist-packages (from black->-r requirements.txt (line 5)) (3.2.0)\n",
252 |             "Requirement already satisfied: ipython>=7.8.0 in /usr/local/lib/python3.9/dist-packages (from black->-r requirements.txt (line 5)) (7.34.0)\n",
253 |             "Collecting tokenize-rt>=3.2.0\n",
254 |             "  Downloading tokenize_rt-5.0.0-py2.py3-none-any.whl (5.8 kB)\n",
255 |             "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.9/dist-packages (from datasets->-r requirements.txt (line 7)) (2.27.1)\n",
256 |             "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.9/dist-packages (from datasets->-r requirements.txt (line 7)) (2023.3.0)\n",
257 |             "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.9/dist-packages (from datasets->-r requirements.txt (line 7)) (4.65.0)\n",
258 |             "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.9/dist-packages (from datasets->-r requirements.txt (line 7)) (9.0.0)\n",
259 |             "Collecting aiohttp\n",
260 |             "  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n",
261 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m68.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
262 |             "\u001b[?25hCollecting dill<0.3.7,>=0.3.0\n",
263 |             "  Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n",
264 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 KB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
265 |             "\u001b[?25hCollecting xxhash\n",
266 |             "  Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
267 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.2/212.2 KB\u001b[0m \u001b[31m25.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
268 |             "\u001b[?25hCollecting huggingface-hub<1.0.0,>=0.11.0\n",
269 |             "  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)\n",
270 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.1/200.1 KB\u001b[0m \u001b[31m24.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
271 |             "\u001b[?25hCollecting multiprocess\n",
272 |             "  Downloading multiprocess-0.70.14-py39-none-any.whl (132 kB)\n",
273 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.9/132.9 KB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
274 |             "\u001b[?25hCollecting responses<0.19\n",
275 |             "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
276 |             "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from datasets->-r requirements.txt (line 7)) (1.4.4)\n",
277 |             "Requirement already satisfied: six in /usr/local/lib/python3.9/dist-packages (from fire->-r requirements.txt (line 8)) (1.16.0)\n",
278 |             "Requirement already satisfied: termcolor in /usr/local/lib/python3.9/dist-packages (from fire->-r requirements.txt (line 8)) (2.2.0)\n",
279 |             "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers==4.28.0.dev0->-r requirements.txt (line 10)) (3.10.7)\n",
280 |             "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n",
281 |             "  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n",
282 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m97.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
283 |             "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.28.0.dev0->-r requirements.txt (line 10)) (2022.10.31)\n",
284 |             "Collecting aiofiles\n",
285 |             "  Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)\n",
286 |             "Collecting gradio-client>=0.0.5\n",
287 |             "  Downloading gradio_client-0.0.8-py3-none-any.whl (20 kB)\n",
288 |             "Collecting orjson\n",
289 |             "  Downloading orjson-3.8.10-cp39-cp39-manylinux_2_28_x86_64.whl (140 kB)\n",
290 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m140.5/140.5 KB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
291 |             "\u001b[?25hCollecting python-multipart\n",
292 |             "  Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n",
293 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 KB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
294 |             "\u001b[?25hCollecting httpx\n",
295 |             "  Downloading httpx-0.23.3-py3-none-any.whl (71 kB)\n",
296 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 KB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
297 |             "\u001b[?25hRequirement already satisfied: pydantic in /usr/local/lib/python3.9/dist-packages (from gradio->-r requirements.txt (line 12)) (1.10.7)\n",
298 |             "Requirement already satisfied: markdown-it-py[linkify]>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from gradio->-r requirements.txt (line 12)) (2.2.0)\n",
299 |             "Collecting semantic-version\n",
300 |             "  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
301 |             "Collecting uvicorn\n",
302 |             "  Downloading uvicorn-0.21.1-py3-none-any.whl (57 kB)\n",
303 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.8/57.8 KB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
304 |             "\u001b[?25hRequirement already satisfied: pillow in /usr/local/lib/python3.9/dist-packages (from gradio->-r requirements.txt (line 12)) (8.4.0)\n",
305 |             "Collecting websockets>=10.0\n",
306 |             "  Downloading websockets-11.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
307 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.5/129.5 KB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
308 |             "\u001b[?25hRequirement already satisfied: markupsafe in /usr/local/lib/python3.9/dist-packages (from gradio->-r requirements.txt (line 12)) (2.1.2)\n",
309 |             "Collecting mdit-py-plugins<=0.3.3\n",
310 |             "  Downloading mdit_py_plugins-0.3.3-py3-none-any.whl (50 kB)\n",
311 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 KB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
312 |             "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.9/dist-packages (from gradio->-r requirements.txt (line 12)) (3.7.1)\n",
313 |             "Collecting ffmpy\n",
314 |             "  Downloading ffmpy-0.3.0.tar.gz (4.8 kB)\n",
315 |             "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
316 |             "Requirement already satisfied: altair>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from gradio->-r requirements.txt (line 12)) (4.2.2)\n",
317 |             "Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from gradio->-r requirements.txt (line 12)) (3.1.2)\n",
318 |             "Collecting fastapi\n",
319 |             "  Downloading fastapi-0.95.0-py3-none-any.whl (57 kB)\n",
320 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.1/57.1 KB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
321 |             "\u001b[?25hCollecting pydub\n",
322 |             "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
323 |             "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.9/dist-packages (from altair>=4.2.0->gradio->-r requirements.txt (line 12)) (4.3.3)\n",
324 |             "Requirement already satisfied: toolz in /usr/local/lib/python3.9/dist-packages (from altair>=4.2.0->gradio->-r requirements.txt (line 12)) (0.12.0)\n",
325 |             "Requirement already satisfied: entrypoints in /usr/local/lib/python3.9/dist-packages (from altair>=4.2.0->gradio->-r requirements.txt (line 12)) (0.4)\n",
326 |             "Collecting multidict<7.0,>=4.5\n",
327 |             "  Downloading multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n",
328 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.2/114.2 KB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
329 |             "\u001b[?25hCollecting async-timeout<5.0,>=4.0.0a3\n",
330 |             "  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
331 |             "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets->-r requirements.txt (line 7)) (22.2.0)\n",
332 |             "Collecting yarl<2.0,>=1.0\n",
333 |             "  Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)\n",
334 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.6/264.6 KB\u001b[0m \u001b[31m32.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
335 |             "\u001b[?25hCollecting frozenlist>=1.1.1\n",
336 |             "  Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)\n",
337 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.8/158.8 KB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
338 |             "\u001b[?25hRequirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets->-r requirements.txt (line 7)) (2.0.12)\n",
339 |             "Collecting aiosignal>=1.1.2\n",
340 |             "  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
341 |             "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (5.7.1)\n",
342 |             "Requirement already satisfied: decorator in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (4.4.2)\n",
343 |             "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (4.8.0)\n",
344 |             "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (3.0.38)\n",
345 |             "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (67.6.1)\n",
346 |             "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (0.1.6)\n",
347 |             "Requirement already satisfied: pickleshare in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (0.7.5)\n",
348 |             "Requirement already satisfied: pygments in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (2.14.0)\n",
349 |             "Collecting jedi>=0.16\n",
350 |             "  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)\n",
351 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m89.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
352 |             "\u001b[?25hRequirement already satisfied: backcall in /usr/local/lib/python3.9/dist-packages (from ipython>=7.8.0->black->-r requirements.txt (line 5)) (0.2.0)\n",
353 |             "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.9/dist-packages (from markdown-it-py[linkify]>=2.0.0->gradio->-r requirements.txt (line 12)) (0.1.2)\n",
354 |             "Collecting linkify-it-py<3,>=1\n",
355 |             "  Downloading linkify_it_py-2.0.0-py3-none-any.whl (19 kB)\n",
356 |             "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets->-r requirements.txt (line 7)) (2.8.2)\n",
357 |             "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets->-r requirements.txt (line 7)) (2022.7.1)\n",
358 |             "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 7)) (3.4)\n",
359 |             "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 7)) (1.26.15)\n",
360 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 7)) (2022.12.7)\n",
361 |             "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch>=1.4.0->accelerate->-r requirements.txt (line 1)) (1.11.1)\n",
362 |             "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch>=1.4.0->accelerate->-r requirements.txt (line 1)) (2.0.0)\n",
363 |             "Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch>=1.4.0->accelerate->-r requirements.txt (line 1)) (3.0)\n",
364 |             "Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.4.0->accelerate->-r requirements.txt (line 1)) (16.0.0)\n",
365 |             "Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.4.0->accelerate->-r requirements.txt (line 1)) (3.25.2)\n",
366 |             "Collecting starlette<0.27.0,>=0.26.1\n",
367 |             "  Downloading starlette-0.26.1-py3-none-any.whl (66 kB)\n",
368 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 KB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
369 |             "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.9/dist-packages (from httpx->gradio->-r requirements.txt (line 12)) (1.3.0)\n",
370 |             "Collecting httpcore<0.17.0,>=0.15.0\n",
371 |             "  Downloading httpcore-0.16.3-py3-none-any.whl (69 kB)\n",
372 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.6/69.6 KB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
373 |             "\u001b[?25hCollecting rfc3986[idna2008]<2,>=1.3\n",
374 |             "  Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)\n",
375 |             "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio->-r requirements.txt (line 12)) (1.0.7)\n",
376 |             "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio->-r requirements.txt (line 12)) (3.0.9)\n",
377 |             "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio->-r requirements.txt (line 12)) (0.11.0)\n",
378 |             "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio->-r requirements.txt (line 12)) (1.4.4)\n",
379 |             "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio->-r requirements.txt (line 12)) (4.39.3)\n",
380 |             "Requirement already satisfied: importlib-resources>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->gradio->-r requirements.txt (line 12)) (5.12.0)\n",
381 |             "Collecting h11>=0.8\n",
382 |             "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
383 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 KB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
384 |             "\u001b[?25hRequirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.9/dist-packages (from httpcore<0.17.0,>=0.15.0->httpx->gradio->-r requirements.txt (line 12)) (3.6.2)\n",
385 |             "Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.9/dist-packages (from importlib-resources>=3.2.0->matplotlib->gradio->-r requirements.txt (line 12)) (3.15.0)\n",
386 |             "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /usr/local/lib/python3.9/dist-packages (from jedi>=0.16->ipython>=7.8.0->black->-r requirements.txt (line 5)) (0.8.3)\n",
387 |             "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.9/dist-packages (from jsonschema>=3.0->altair>=4.2.0->gradio->-r requirements.txt (line 12)) (0.19.3)\n",
388 |             "Collecting uc-micro-py\n",
389 |             "  Downloading uc_micro_py-1.0.1-py3-none-any.whl (6.2 kB)\n",
390 |             "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.9/dist-packages (from pexpect>4.3->ipython>=7.8.0->black->-r requirements.txt (line 5)) (0.7.0)\n",
391 |             "Requirement already satisfied: wcwidth in /usr/local/lib/python3.9/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=7.8.0->black->-r requirements.txt (line 5)) (0.2.6)\n",
392 |             "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch>=1.4.0->accelerate->-r requirements.txt (line 1)) (1.3.0)\n",
393 |             "Building wheels for collected packages: fire, peft, transformers, ffmpy\n",
394 |             "  Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
395 |             "  Created wheel for fire: filename=fire-0.5.0-py2.py3-none-any.whl size=116952 sha256=d9966a17cd65b64d2bf2858a0d2c1827917d137ac645ede196a70eba4223d175\n",
396 |             "  Stored in directory: /root/.cache/pip/wheels/f7/f1/89/b9ea2bf8f80ec027a88fef1d354b3816b4d3d29530988972f6\n",
397 |             "  Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
398 |             "  Created wheel for peft: filename=peft-0.3.0.dev0-py3-none-any.whl size=49855 sha256=32163ee65809f051ba3e1fda9ee3b6f3f779a97ef9a3acac4991b2580b9379d6\n",
399 |             "  Stored in directory: /tmp/pip-ephem-wheel-cache-8ty6xkkw/wheels/2d/60/1b/0edd9dc0f0c489738b1166bc1b0b560ee368f7721f89d06e3a\n",
400 |             "  Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
401 |             "  Created wheel for transformers: filename=transformers-4.28.0.dev0-py3-none-any.whl size=6895957 sha256=c82892d2583be352cbe4d5f1aa2e4d88d6ca0363239a644baf792edd944cd09c\n",
402 |             "  Stored in directory: /tmp/pip-ephem-wheel-cache-8ty6xkkw/wheels/f7/92/8c/752ff3bfcd3439805d8bbf641614da38ef3226e127ebea86ee\n",
403 |             "  Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
404 |             "  Created wheel for ffmpy: filename=ffmpy-0.3.0-py3-none-any.whl size=4707 sha256=07a193b603fec97362baf967d1a3dcbfdb3ef1c0a7a9fa14f599a79428efafa3\n",
405 |             "  Stored in directory: /root/.cache/pip/wheels/91/e2/96/f676aa08bfd789328c6576cd0f1fde4a3d686703bb0c247697\n",
406 |             "Successfully built fire peft transformers ffmpy\n",
407 |             "Installing collected packages: tokenizers, sentencepiece, rfc3986, pydub, ffmpy, bitsandbytes, xxhash, websockets, uc-micro-py, tokenize-rt, semantic-version, python-multipart, pathspec, orjson, mypy-extensions, multidict, loralib, jedi, h11, frozenlist, fire, dill, async-timeout, aiofiles, yarl, uvicorn, starlette, responses, multiprocess, mdit-py-plugins, linkify-it-py, huggingface-hub, httpcore, black, aiosignal, transformers, httpx, gradio-client, fastapi, aiohttp, gradio, datasets, accelerate, peft\n",
408 |             "Successfully installed accelerate-0.18.0 aiofiles-23.1.0 aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 bitsandbytes-0.37.2 black-23.3.0 datasets-2.11.0 dill-0.3.6 fastapi-0.95.0 ffmpy-0.3.0 fire-0.5.0 frozenlist-1.3.3 gradio-3.24.1 gradio-client-0.0.8 h11-0.14.0 httpcore-0.16.3 httpx-0.23.3 huggingface-hub-0.13.4 jedi-0.18.2 linkify-it-py-2.0.0 loralib-0.1.1 mdit-py-plugins-0.3.3 multidict-6.0.4 multiprocess-0.70.14 mypy-extensions-1.0.0 orjson-3.8.10 pathspec-0.11.1 peft-0.3.0.dev0 pydub-0.25.1 python-multipart-0.0.6 responses-0.18.0 rfc3986-1.5.0 semantic-version-2.10.0 sentencepiece-0.1.97 starlette-0.26.1 tokenize-rt-5.0.0 tokenizers-0.13.3 transformers-4.28.0.dev0 uc-micro-py-1.0.1 uvicorn-0.21.1 websockets-11.0.1 xxhash-3.2.0 yarl-1.8.2\n"
409 |           ]
410 |         }
411 |       ]
412 |     },
413 |     {
414 |       "cell_type": "markdown",
415 |       "source": [
416 |         "# Run the application"
417 |       ],
418 |       "metadata": {
419 |         "id": "Cr3bQkSePfrG"
420 |       }
421 |     },
422 |     {
423 |       "cell_type": "code",
424 |       "source": [
425 |         "#@title Choose models\n",
426 |         "\n",
427 |         "base_model = 'decapoda-research/llama-7b-hf' #@param [\"decapoda-research/llama-7b-hf\", \"decapoda-research/llama-13b-hf\", \"decapoda-research/llama-30b-hf\"]\n",
428 |         "finetuned_model = 'wangrongsheng/chatgentitle-lora-cs-6-new' #@param [\"wangrongsheng/chatgentitle-lora-cs-6-new\", \"wangrongsheng/chatgentitle-lora-all-3-new\", \"wangrongsheng/chatgentitle-lora-cs-3-new\"]\n"
429 |       ],
430 |       "metadata": {
431 |         "id": "4Wg0eqnkPnq-"
432 |       },
433 |       "execution_count": null,
434 |       "outputs": []
435 |     },
436 |     {
437 |       "cell_type": "markdown",
438 |       "source": [
439 |         "## Run the application\n",
440 |         "\n",
441 |         "It will take some time since LLaMA weights are huge. \n",
442 |         "\n",
443 |         "Click the URL appeared in the `Running on public URL:` field from the log. That will bring you to a new browser tab which opens up the running application."
444 |       ],
445 |       "metadata": {
446 |         "id": "b81jhdtcQyOP"
447 |       }
448 |     },
449 |     {
450 |       "cell_type": "code",
451 |       "source": [
452 |         "!python generate.py --load_8bit --base_model $base_model --lora_weights $finetuned_model --share_gradio"
453 |       ],
454 |       "metadata": {
455 |         "id": "y3qpzBw2jMHq"
456 |       },
457 |       "execution_count": null,
458 |       "outputs": []
459 |     },
460 |     {
461 |       "cell_type": "markdown",
462 |       "source": [
463 |         "Have fun!"
464 |       ],
465 |       "metadata": {
466 |         "id": "mc5WIyxYGDo5"
467 |       }
468 |     }
469 |   ]
470 | }


--------------------------------------------------------------------------------
/finetune.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from typing import List
  4 | 
  5 | import fire
  6 | import torch
  7 | import transformers
  8 | from datasets import load_dataset
  9 | 
 10 | """
 11 | Unused imports:
 12 | import torch.nn as nn
 13 | import bitsandbytes as bnb
 14 | """
 15 | 
 16 | from peft import (
 17 |     LoraConfig,
 18 |     get_peft_model,
 19 |     get_peft_model_state_dict,
 20 |     prepare_model_for_int8_training,
 21 |     set_peft_model_state_dict,
 22 | )
 23 | from transformers import LlamaForCausalLM, LlamaTokenizer
 24 | 
 25 | from utils.prompter import Prompter
 26 | 
 27 | 
 28 | def train(
 29 |     # model/data params
 30 |     base_model: str = "",  # the only required argument
 31 |     data_path: str = "yahma/alpaca-cleaned",
 32 |     output_dir: str = "./lora-alpaca",
 33 |     # training hyperparams
 34 |     batch_size: int = 128,
 35 |     micro_batch_size: int = 4,
 36 |     num_epochs: int = 3,
 37 |     learning_rate: float = 3e-4,
 38 |     cutoff_len: int = 256,
 39 |     val_set_size: int = 2000,
 40 |     # lora hyperparams
 41 |     lora_r: int = 8,
 42 |     lora_alpha: int = 16,
 43 |     lora_dropout: float = 0.05,
 44 |     lora_target_modules: List[str] = [
 45 |         "q_proj",
 46 |         "v_proj",
 47 |     ],
 48 |     # llm hyperparams
 49 |     train_on_inputs: bool = True,  # if False, masks out inputs in loss
 50 |     group_by_length: bool = False,  # faster, but produces an odd training loss curve
 51 |     # wandb params
 52 |     wandb_project: str = "",
 53 |     wandb_run_name: str = "",
 54 |     wandb_watch: str = "",  # options: false | gradients | all
 55 |     wandb_log_model: str = "",  # options: false | true
 56 |     resume_from_checkpoint: str = None,  # either training checkpoint or final adapter
 57 |     prompt_template_name: str = "alpaca",  # The prompt template to use, will default to alpaca.
 58 | ):
 59 |     if int(os.environ.get("LOCAL_RANK", 0)) == 0:
 60 |         print(
 61 |             f"Training Alpaca-LoRA model with params:\n"
 62 |             f"base_model: {base_model}\n"
 63 |             f"data_path: {data_path}\n"
 64 |             f"output_dir: {output_dir}\n"
 65 |             f"batch_size: {batch_size}\n"
 66 |             f"micro_batch_size: {micro_batch_size}\n"
 67 |             f"num_epochs: {num_epochs}\n"
 68 |             f"learning_rate: {learning_rate}\n"
 69 |             f"cutoff_len: {cutoff_len}\n"
 70 |             f"val_set_size: {val_set_size}\n"
 71 |             f"lora_r: {lora_r}\n"
 72 |             f"lora_alpha: {lora_alpha}\n"
 73 |             f"lora_dropout: {lora_dropout}\n"
 74 |             f"lora_target_modules: {lora_target_modules}\n"
 75 |             f"train_on_inputs: {train_on_inputs}\n"
 76 |             f"group_by_length: {group_by_length}\n"
 77 |             f"wandb_project: {wandb_project}\n"
 78 |             f"wandb_run_name: {wandb_run_name}\n"
 79 |             f"wandb_watch: {wandb_watch}\n"
 80 |             f"wandb_log_model: {wandb_log_model}\n"
 81 |             f"resume_from_checkpoint: {resume_from_checkpoint or False}\n"
 82 |             f"prompt template: {prompt_template_name}\n"
 83 |         )
 84 |     assert (
 85 |         base_model
 86 |     ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'"
 87 |     gradient_accumulation_steps = batch_size // micro_batch_size
 88 | 
 89 |     prompter = Prompter(prompt_template_name)
 90 | 
 91 |     device_map = "auto"
 92 |     world_size = int(os.environ.get("WORLD_SIZE", 1))
 93 |     ddp = world_size != 1
 94 |     if ddp:
 95 |         device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
 96 |         gradient_accumulation_steps = gradient_accumulation_steps // world_size
 97 | 
 98 |     # Check if parameter passed or if set within environ
 99 |     use_wandb = len(wandb_project) > 0 or (
100 |         "WANDB_PROJECT" in os.environ and len(os.environ["WANDB_PROJECT"]) > 0
101 |     )
102 |     # Only overwrite environ if wandb param passed
103 |     if len(wandb_project) > 0:
104 |         os.environ["WANDB_PROJECT"] = wandb_project
105 |     if len(wandb_watch) > 0:
106 |         os.environ["WANDB_WATCH"] = wandb_watch
107 |     if len(wandb_log_model) > 0:
108 |         os.environ["WANDB_LOG_MODEL"] = wandb_log_model
109 | 
110 |     model = LlamaForCausalLM.from_pretrained(
111 |         base_model,
112 |         load_in_8bit=True,
113 |         torch_dtype=torch.float16,
114 |         device_map=device_map,
115 |     )
116 | 
117 |     tokenizer = LlamaTokenizer.from_pretrained(base_model)
118 | 
119 |     tokenizer.pad_token_id = (
120 |         0  # unk. we want this to be different from the eos token
121 |     )
122 |     tokenizer.padding_side = "left"  # Allow batched inference
123 | 
124 |     def tokenize(prompt, add_eos_token=True):
125 |         # there's probably a way to do this with the tokenizer settings
126 |         # but again, gotta move fast
127 |         result = tokenizer(
128 |             prompt,
129 |             truncation=True,
130 |             max_length=cutoff_len,
131 |             padding=False,
132 |             return_tensors=None,
133 |         )
134 |         if (
135 |             result["input_ids"][-1] != tokenizer.eos_token_id
136 |             and len(result["input_ids"]) < cutoff_len
137 |             and add_eos_token
138 |         ):
139 |             result["input_ids"].append(tokenizer.eos_token_id)
140 |             result["attention_mask"].append(1)
141 | 
142 |         result["labels"] = result["input_ids"].copy()
143 | 
144 |         return result
145 | 
146 |     def generate_and_tokenize_prompt(data_point):
147 |         full_prompt = prompter.generate_prompt(
148 |             data_point["instruction"],
149 |             data_point["input"],
150 |             data_point["output"],
151 |         )
152 |         tokenized_full_prompt = tokenize(full_prompt)
153 |         if not train_on_inputs:
154 |             user_prompt = prompter.generate_prompt(
155 |                 data_point["instruction"], data_point["input"]
156 |             )
157 |             tokenized_user_prompt = tokenize(user_prompt, add_eos_token=False)
158 |             user_prompt_len = len(tokenized_user_prompt["input_ids"])
159 | 
160 |             tokenized_full_prompt["labels"] = [
161 |                 -100
162 |             ] * user_prompt_len + tokenized_full_prompt["labels"][
163 |                 user_prompt_len:
164 |             ]  # could be sped up, probably
165 |         return tokenized_full_prompt
166 | 
167 |     model = prepare_model_for_int8_training(model)
168 | 
169 |     config = LoraConfig(
170 |         r=lora_r,
171 |         lora_alpha=lora_alpha,
172 |         target_modules=lora_target_modules,
173 |         lora_dropout=lora_dropout,
174 |         bias="none",
175 |         task_type="CAUSAL_LM",
176 |     )
177 |     model = get_peft_model(model, config)
178 | 
179 |     if data_path.endswith(".json") or data_path.endswith(".jsonl"):
180 |         data = load_dataset("json", data_files=data_path)
181 |     else:
182 |         data = load_dataset(data_path)
183 | 
184 |     if resume_from_checkpoint:
185 |         # Check the available weights and load them
186 |         checkpoint_name = os.path.join(
187 |             resume_from_checkpoint, "pytorch_model.bin"
188 |         )  # Full checkpoint
189 |         if not os.path.exists(checkpoint_name):
190 |             checkpoint_name = os.path.join(
191 |                 resume_from_checkpoint, "adapter_model.bin"
192 |             )  # only LoRA model - LoRA config above has to fit
193 |             resume_from_checkpoint = (
194 |                 False  # So the trainer won't try loading its state
195 |             )
196 |         # The two files above have a different name depending on how they were saved, but are actually the same.
197 |         if os.path.exists(checkpoint_name):
198 |             print(f"Restarting from {checkpoint_name}")
199 |             adapters_weights = torch.load(checkpoint_name)
200 |             model = set_peft_model_state_dict(model, adapters_weights)
201 |         else:
202 |             print(f"Checkpoint {checkpoint_name} not found")
203 | 
204 |     model.print_trainable_parameters()  # Be more transparent about the % of trainable params.
205 | 
206 |     if val_set_size > 0:
207 |         train_val = data["train"].train_test_split(
208 |             test_size=val_set_size, shuffle=True, seed=42
209 |         )
210 |         train_data = (
211 |             train_val["train"].shuffle().map(generate_and_tokenize_prompt)
212 |         )
213 |         val_data = (
214 |             train_val["test"].shuffle().map(generate_and_tokenize_prompt)
215 |         )
216 |     else:
217 |         train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
218 |         val_data = None
219 | 
220 |     if not ddp and torch.cuda.device_count() > 1:
221 |         # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
222 |         model.is_parallelizable = True
223 |         model.model_parallel = True
224 | 
225 |     trainer = transformers.Trainer(
226 |         model=model,
227 |         train_dataset=train_data,
228 |         eval_dataset=val_data,
229 |         args=transformers.TrainingArguments(
230 |             per_device_train_batch_size=micro_batch_size,
231 |             gradient_accumulation_steps=gradient_accumulation_steps,
232 |             warmup_steps=100,
233 |             num_train_epochs=num_epochs,
234 |             learning_rate=learning_rate,
235 |             fp16=True,
236 |             logging_steps=10,
237 |             optim="adamw_torch",
238 |             evaluation_strategy="steps" if val_set_size > 0 else "no",
239 |             save_strategy="steps",
240 |             eval_steps=200 if val_set_size > 0 else None,
241 |             save_steps=200,
242 |             output_dir=output_dir,
243 |             save_total_limit=3,
244 |             load_best_model_at_end=True if val_set_size > 0 else False,
245 |             ddp_find_unused_parameters=False if ddp else None,
246 |             group_by_length=group_by_length,
247 |             report_to="wandb" if use_wandb else None,
248 |             run_name=wandb_run_name if use_wandb else None,
249 |         ),
250 |         data_collator=transformers.DataCollatorForSeq2Seq(
251 |             tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
252 |         ),
253 |     )
254 |     model.config.use_cache = False
255 | 
256 |     old_state_dict = model.state_dict
257 |     model.state_dict = (
258 |         lambda self, *_, **__: get_peft_model_state_dict(
259 |             self, old_state_dict()
260 |         )
261 |     ).__get__(model, type(model))
262 | 
263 |     if torch.__version__ >= "2" and sys.platform != "win32":
264 |         model = torch.compile(model)
265 | 
266 |     trainer.train(resume_from_checkpoint=resume_from_checkpoint)
267 | 
268 |     model.save_pretrained(output_dir)
269 | 
270 |     print(
271 |         "\n If there's a warning about missing keys above, please disregard :)"
272 |     )
273 | 
274 | 
275 | if __name__ == "__main__":
276 |     fire.Fire(train)
277 | 


--------------------------------------------------------------------------------
/generate.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import fire
  4 | import gradio as gr
  5 | import torch
  6 | import transformers
  7 | from peft import PeftModel
  8 | from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
  9 | 
 10 | from utils.prompter import Prompter
 11 | 
 12 | if torch.cuda.is_available():
 13 |     device = "cuda"
 14 | else:
 15 |     device = "cpu"
 16 | 
 17 | try:
 18 |     if torch.backends.mps.is_available():
 19 |         device = "mps"
 20 | except:  # noqa: E722
 21 |     pass
 22 | 
 23 | 
 24 | def main(
 25 |     load_8bit: bool = False,
 26 |     base_model: str = "",
 27 |     lora_weights: str = "tloen/alpaca-lora-7b",
 28 |     prompt_template: str = "",  # The prompt template to use, will default to alpaca.
 29 |     server_name: str = "127.0.0.1",  # Allows to listen on all interfaces by providing '0.0.0.0'
 30 |     share_gradio: bool = False,
 31 | ):
 32 |     assert (
 33 |         base_model
 34 |     ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'"
 35 | 
 36 |     prompter = Prompter(prompt_template)
 37 |     tokenizer = LlamaTokenizer.from_pretrained(base_model)
 38 |     if device == "cuda":
 39 |         model = LlamaForCausalLM.from_pretrained(
 40 |             base_model,
 41 |             load_in_8bit=load_8bit,
 42 |             torch_dtype=torch.float16,
 43 |             device_map="auto",
 44 |         )
 45 |         model = PeftModel.from_pretrained(
 46 |             model,
 47 |             lora_weights,
 48 |             torch_dtype=torch.float16,
 49 |             # device_map={"": 0}, # 如果报错device问题就加上
 50 |         )
 51 |     elif device == "mps":
 52 |         model = LlamaForCausalLM.from_pretrained(
 53 |             base_model,
 54 |             device_map={"": device},
 55 |             torch_dtype=torch.float16,
 56 |         )
 57 |         model = PeftModel.from_pretrained(
 58 |             model,
 59 |             lora_weights,
 60 |             device_map={"": device},
 61 |             torch_dtype=torch.float16,
 62 |         )
 63 |     else:
 64 |         model = LlamaForCausalLM.from_pretrained(
 65 |             base_model, device_map={"": device}, low_cpu_mem_usage=True
 66 |         )
 67 |         model = PeftModel.from_pretrained(
 68 |             model,
 69 |             lora_weights,
 70 |             device_map={"": device},
 71 |         )
 72 | 
 73 |     # unwind broken decapoda-research config
 74 |     model.config.pad_token_id = tokenizer.pad_token_id = 0  # unk
 75 |     model.config.bos_token_id = 1
 76 |     model.config.eos_token_id = 2
 77 | 
 78 |     if not load_8bit:
 79 |         model.half()  # seems to fix bugs for some users.
 80 | 
 81 |     model.eval()
 82 |     if torch.__version__ >= "2" and sys.platform != "win32":
 83 |         model = torch.compile(model)
 84 | 
 85 |     def evaluate(
 86 |         instruction,
 87 |         input=None,
 88 |         temperature=0.1,
 89 |         top_p=0.75,
 90 |         top_k=40,
 91 |         num_beams=4,
 92 |         max_new_tokens=128,
 93 |         **kwargs,
 94 |     ):
 95 |         prompt = prompter.generate_prompt(instruction, input)
 96 |         inputs = tokenizer(prompt, return_tensors="pt")
 97 |         input_ids = inputs["input_ids"].to(device)
 98 |         generation_config = GenerationConfig(
 99 |             temperature=temperature,
100 |             top_p=top_p,
101 |             top_k=top_k,
102 |             num_beams=num_beams,
103 |             **kwargs,
104 |         )
105 |         with torch.no_grad():
106 |             generation_output = model.generate(
107 |                 input_ids=input_ids,
108 |                 generation_config=generation_config,
109 |                 return_dict_in_generate=True,
110 |                 output_scores=True,
111 |                 max_new_tokens=max_new_tokens,
112 |             )
113 |         s = generation_output.sequences[0]
114 |         output = tokenizer.decode(s)
115 |         return prompter.get_response(output)
116 | 
117 |     gr.Interface(
118 |         fn=evaluate,
119 |         inputs=[
120 |             gr.components.Textbox(
121 |                 lines=2,
122 |                 label="Instruction",
123 |                 placeholder="Tell me about alpacas.",
124 |             ),
125 |             gr.components.Textbox(lines=2, label="Input", placeholder="none"),
126 |             gr.components.Slider(
127 |                 minimum=0, maximum=1, value=0.1, label="Temperature"
128 |             ),
129 |             gr.components.Slider(
130 |                 minimum=0, maximum=1, value=0.75, label="Top p"
131 |             ),
132 |             gr.components.Slider(
133 |                 minimum=0, maximum=100, step=1, value=40, label="Top k"
134 |             ),
135 |             gr.components.Slider(
136 |                 minimum=1, maximum=4, step=1, value=4, label="Beams"
137 |             ),
138 |             gr.components.Slider(
139 |                 minimum=1, maximum=2000, step=1, value=128, label="Max tokens"
140 |             ),
141 |         ],
142 |         outputs=[
143 |             gr.inputs.Textbox(
144 |                 lines=5,
145 |                 label="Output",
146 |             )
147 |         ],
148 |         title="🌲 ChatGenTitle",
149 |         description="please visit [the project's website](https://github.com/WangRongsheng/ChatGenTitle).",  # noqa: E501
150 |     ).launch(server_name=server_name, share=share_gradio)
151 |     # Old testing code follows.
152 | 
153 |     """
154 |     # testing code for readme
155 |     for instruction in [
156 |         "Tell me about alpacas.",
157 |         "Tell me about the president of Mexico in 2019.",
158 |         "Tell me about the king of France in 2019.",
159 |         "List all Canadian provinces in alphabetical order.",
160 |         "Write a Python program that prints the first 10 Fibonacci numbers.",
161 |         "Write a program that prints the numbers from 1 to 100. But for multiples of three print 'Fizz' instead of the number and for the multiples of five print 'Buzz'. For numbers which are multiples of both three and five print 'FizzBuzz'.",  # noqa: E501
162 |         "Tell me five words that rhyme with 'shock'.",
163 |         "Translate the sentence 'I have no mouth but I must scream' into Spanish.",
164 |         "Count up from 1 to 500.",
165 |     ]:
166 |         print("Instruction:", instruction)
167 |         print("Response:", evaluate(instruction))
168 |         print()
169 |     """
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     fire.Fire(main)
174 | 


--------------------------------------------------------------------------------
/get_arxiv_multiprocessing.py:
--------------------------------------------------------------------------------
 1 | import urllib.request
 2 | import feedparser
 3 | from datetime import datetime
 4 | import json
 5 | import multiprocessing
 6 | from multiprocessing import Pool
 7 | import time
 8 | import re
 9 | 
10 | def get_article_info(url):
11 |     response = urllib.request.urlopen(url)
12 |     rss = response.read()
13 |     feed = feedparser.parse(rss)
14 |     data = []
15 |     # 遍历每个文章，输出标题、摘要和作者信息
16 |     for entry in feed.entries:
17 |         summary = entry.summary.replace('\n', '').replace('</p>', '').replace('<p>', '').replace('\\', '')
18 |         summary = re.sub(r'http\S+', '', summary)
19 |         title = entry.title.replace('(arXiv:'+ entry.title.split('(arXiv:')[1].split(')')[0] + ')', '').strip()
20 |         info = {
21 |                "instruction": "If you are an expert in writing papers, please generate a good paper title for this paper based on other authors' descriptions of their abstracts.",
22 |                "input": str(summary),
23 |                "output": str(title)
24 |                     }
25 |         data.append(info)
26 |     return data
27 | 
28 | if __name__ == '__main__':
29 |     # 获取Arxiv每日更新的人工智能、计算机视觉和机器学习分类的文章
30 |     url1 = "http://export.arxiv.org/rss/cs.AI"  # 人工智能分类RSS源
31 |     url2 = "http://export.arxiv.org/rss/cs.CV"  # 计算机视觉分类RSS源
32 |     url3 = "http://export.arxiv.org/rss/cs.LG"  # 机器学习分类RSS源
33 |     urls = [url1, url2, url3]
34 | 
35 |     # 数据源
36 |     data = []
37 |     
38 |      # 开始计时
39 |     start = time.time()
40 |     
41 |     # 获取 CPU 核心数
42 |     cores = multiprocessing.cpu_count()
43 |     # 将进程数设置为 CPU 核心数 + 1
44 |     processes = cores + 1
45 | 
46 |     # 使用多线程获取文章信息
47 |     with Pool(processes) as p:
48 |         data = p.map(get_article_info, urls)
49 |     # 将多个列表合并为一个列表
50 |     data = [info for subdata in data for info in subdata]
51 | 
52 |     # 获取当前日期
53 |     today = datetime.today().date()
54 | 
55 |     # 保存json
56 |     with open('data/'+str(today)+'.json', 'w+') as f:
57 |         json.dump(data, f)
58 |     
59 |     # 结束计时并输出用时
60 |     end = time.time()
61 |     print('共处理了%s篇文章，用时%.2f秒' % (len(data), end-start))
62 |     
63 |     # 共处理了371篇文章，用时4.51秒
64 | 


--------------------------------------------------------------------------------
/get_daily_llm_paper.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | base_url = "http://export.arxiv.org/api/query?"
 5 | search_query = "large+language+models"
 6 | start = 0
 7 | max_results = 30
 8 | 
 9 | query = f"search_query=all:{search_query}&start={start}&max_results={max_results}"
10 | 
11 | url = base_url + query
12 | 
13 | response = requests.get(url)
14 | 
15 | if response.status_code == 200:
16 |     print("获取论文成功！")
17 | 
18 |     # 解析XML响应
19 |     root = ET.fromstring(response.text)
20 | 
21 |     # 打开用于保存结果的Markdown文件
22 |     with open("LLMs-papers.md", "w+", encoding="utf-8") as md_file:
23 |         print("读取成功")
24 |         # 提取每篇论文的链接和标题
25 |         c = 1
26 |         for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
27 |             link = entry.find('{http://www.w3.org/2005/Atom}id').text
28 |             title = entry.find('{http://www.w3.org/2005/Atom}title').text
29 | 
30 |             # 将标题和链接写入Markdown文件
31 |             md_file.write(str(c)+f". [{title}]({link})\n")
32 |             c = c+1
33 | 
34 |         print("论文信息已保存到papers.md文件！")
35 | 
36 | else:
37 |     print("获取论文失败")
38 | 


--------------------------------------------------------------------------------
/requirements-all.txt:
--------------------------------------------------------------------------------
  1 | absl-py                  1.4.0
  2 | accelerate               0.18.0
  3 | aiofiles                 23.1.0
  4 | aiohttp                  3.8.4
  5 | aiosignal                1.3.1
  6 | altair                   4.2.2
  7 | anyio                    3.6.2
  8 | appdirs                  1.4.4
  9 | asttokens                2.2.1
 10 | async-timeout            4.0.2
 11 | attrs                    22.2.0
 12 | backcall                 0.2.0
 13 | bitsandbytes             0.37.2
 14 | black                    23.3.0
 15 | brotlipy                 0.7.0
 16 | cachetools               5.3.0
 17 | certifi                  2022.12.7
 18 | cffi                     1.15.1
 19 | charset-normalizer       2.0.4
 20 | click                    8.1.3
 21 | cmake                    3.26.1
 22 | conda                    23.1.0
 23 | conda-content-trust      0.1.3
 24 | conda-package-handling   2.0.2
 25 | conda_package_streaming  0.7.0
 26 | contourpy                1.0.7
 27 | cryptography             38.0.4
 28 | cycler                   0.11.0
 29 | datasets                 2.10.1
 30 | decorator                5.1.1
 31 | dill                     0.3.6
 32 | docker-pycreds           0.4.0
 33 | entrypoints              0.4
 34 | executing                1.2.0
 35 | fairscale                0.4.13
 36 | fastapi                  0.95.0
 37 | ffmpy                    0.3.0
 38 | filelock                 3.10.7
 39 | fire                     0.5.0
 40 | fonttools                4.39.3
 41 | frozenlist               1.3.3
 42 | fsspec                   2023.3.0
 43 | gitdb                    4.0.10
 44 | GitPython                3.1.31
 45 | gradio                   3.23.0
 46 | h11                      0.14.0
 47 | hiq-python               1.1.10
 48 | httpcore                 0.16.3
 49 | httpx                    0.23.3
 50 | huggingface-hub          0.13.3
 51 | idna                     3.4
 52 | ipython                  8.11.0
 53 | jedi                     0.18.2
 54 | Jinja2                   3.1.2
 55 | joblib                   1.2.0
 56 | jsonschema               4.17.3
 57 | kiwisolver               1.4.4
 58 | linkify-it-py            2.0.0
 59 | lit                      16.0.0
 60 | markdown-it-py           2.2.0
 61 | MarkupSafe               2.1.2
 62 | matplotlib               3.7.1
 63 | matplotlib-inline        0.1.6
 64 | mdit-py-plugins          0.3.3
 65 | mdurl                    0.1.2
 66 | mpmath                   1.3.0
 67 | multidict                6.0.4
 68 | multiprocess             0.70.14
 69 | mypy-extensions          1.0.0
 70 | networkx                 3.0
 71 | nltk                     3.8.1
 72 | numpy                    1.24.2
 73 | nvidia-cublas-cu11       11.10.3.66
 74 | nvidia-cuda-cupti-cu11   11.7.101
 75 | nvidia-cuda-nvrtc-cu11   11.7.99
 76 | nvidia-cuda-runtime-cu11 11.7.99
 77 | nvidia-cudnn-cu11        8.5.0.96
 78 | nvidia-cufft-cu11        10.9.0.58
 79 | nvidia-curand-cu11       10.2.10.91
 80 | nvidia-cusolver-cu11     11.4.0.1
 81 | nvidia-cusparse-cu11     11.7.4.91
 82 | nvidia-nccl-cu11         2.14.3
 83 | nvidia-nvtx-cu11         11.7.91
 84 | openai                   0.27.2
 85 | orjson                   3.8.9
 86 | packaging                23.0
 87 | pandas                   1.5.3
 88 | parso                    0.8.3
 89 | pathspec                 0.11.1
 90 | pathtools                0.1.2
 91 | peft                     0.3.0.dev0
 92 | pexpect                  4.8.0
 93 | pickleshare              0.7.5
 94 | Pillow                   9.4.0
 95 | pip                      22.3.1
 96 | platformdirs             3.2.0
 97 | pluggy                   1.0.0
 98 | prompt-toolkit           3.0.38
 99 | protobuf                 4.22.1
100 | psutil                   5.9.4
101 | ptyprocess               0.7.0
102 | pure-eval                0.2.2
103 | py-itree                 0.0.18
104 | pyarrow                  11.0.0
105 | pycosat                  0.6.4
106 | pycparser                2.21
107 | pydantic                 1.10.7
108 | pydub                    0.25.1
109 | Pygments                 2.14.0
110 | pyllama                  0.0.9
111 | pyOpenSSL                22.0.0
112 | pyparsing                3.0.9
113 | pyrsistent               0.19.3
114 | PySocks                  1.7.1
115 | python-dateutil          2.8.2
116 | python-multipart         0.0.6
117 | pytz                     2023.3
118 | PyYAML                   6.0
119 | regex                    2023.3.23
120 | requests                 2.28.1
121 | responses                0.18.0
122 | rfc3986                  1.5.0
123 | rouge-score              0.1.2
124 | ruamel.yaml              0.17.21
125 | ruamel.yaml.clib         0.2.6
126 | semantic-version         2.10.0
127 | sentencepiece            0.1.97
128 | sentry-sdk               1.18.0
129 | setproctitle             1.3.2
130 | setuptools               65.6.3
131 | six                      1.16.0
132 | smmap                    5.0.0
133 | sniffio                  1.3.0
134 | stack-data               0.6.2
135 | starlette                0.26.1
136 | sympy                    1.11.1
137 | termcolor                2.2.0
138 | tokenize-rt              5.0.0
139 | tokenizers               0.12.1
140 | tomli                    2.0.1
141 | toolz                    0.12.0
142 | torch                    1.13.1+cu117
143 | torchaudio               0.13.1+cu117
144 | torchvision              0.14.1+cu117
145 | tqdm                     4.64.1
146 | traitlets                5.9.0
147 | transformers             4.28.0.dev0
148 | triton                   2.0.0
149 | typing_extensions        4.5.0
150 | uc-micro-py              1.0.1
151 | urllib3                  1.26.14
152 | uvicorn                  0.21.1
153 | wandb                    0.14.0
154 | wcwidth                  0.2.6
155 | websockets               10.4
156 | wheel                    0.37.1
157 | xxhash                   3.2.0
158 | yarl                     1.8.2
159 | zstandard                0.18.0


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | appdirs
 3 | bitsandbytes
 4 | black
 5 | black[jupyter]
 6 | datasets
 7 | fire
 8 | git+https://github.com/huggingface/peft.git
 9 | git+https://github.com/huggingface/transformers.git
10 | gradio
11 | sentencepiece
12 | 


--------------------------------------------------------------------------------
/templates/README.md:
--------------------------------------------------------------------------------
 1 | # Prompt templates
 2 | 
 3 | This directory contains template styles for the prompts used to finetune LoRA models.
 4 | 
 5 | ## Format
 6 | 
 7 | A template is described via a JSON file with the following keys:
 8 | 
 9 | - `prompt_input`: The template to use when input is not None. Uses `{instruction}` and `{input}` placeholders.
10 | - `prompt_no_input`: The template to use when input is None. Uses `{instruction}` placeholders.
11 | - `description`: A short description of the template, with possible use cases.
12 | - `response_split`: The text to use as separator when cutting real response from the model output.
13 | 
14 | No `{response}` placeholder was used, since the response is always the last element of the template and is just to be concatenated to the rest.
15 | 
16 | ## Example template
17 | 
18 | The default template, used unless otherwise specified, is `alpaca.json`
19 | 
20 | ```json
21 | {
22 |     "description": "Template used by Alpaca-LoRA.",
23 |     "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
24 |     "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
25 |     "response_split": "### Response:"    
26 | }
27 | 
28 | ```
29 | 
30 | ## Current templates
31 | 
32 | ### alpaca
33 | 
34 | Default template used for generic LoRA fine tunes so far.
35 | 
36 | ### alpaca_legacy
37 | 
38 | Legacy template used by the original alpaca repo, with no `\n` after the response field. Kept for reference and experiments.
39 | 
40 | ### alpaca_short
41 | 
42 | A trimmed down alpaca template which seems to perform just as well and spare some tokens. Models created with the default template seem to be queryable by the short tempalte as well. More experiments are welcome.
43 | 
44 | ### vigogne
45 | 
46 | The default alpaca template, translated to french. This template was used to train the "Vigogne" LoRA and is to be used to query it, or for extra fine tuning.
47 | 


--------------------------------------------------------------------------------
/templates/alpaca.json:
--------------------------------------------------------------------------------
1 | {
2 |     "description": "Template used by Alpaca-LoRA.",
3 |     "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
4 |     "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
5 |     "response_split": "### Response:"    
6 | }
7 | 


--------------------------------------------------------------------------------
/templates/alpaca_legacy.json:
--------------------------------------------------------------------------------
1 | {
2 |     "description": "Legacy template, used by Original Alpaca repository.",
3 |     "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:",
4 |     "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:",
5 |     "response_split": "### Response:"    
6 | }
7 | 


--------------------------------------------------------------------------------
/templates/alpaca_short.json:
--------------------------------------------------------------------------------
1 | {
2 |     "description": "A shorter template to experiment with.",
3 |     "prompt_input": "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
4 |     "prompt_no_input": "### Instruction:\n{instruction}\n\n### Response:\n",
5 |     "response_split": "### Response:"    
6 | }
7 | 


--------------------------------------------------------------------------------
/templates/vigogne.json:
--------------------------------------------------------------------------------
1 | {
2 |     "description": "French template, used by Vigogne for finetuning.",
3 |     "prompt_input": "Ci-dessous se trouve une instruction qui décrit une tâche, associée à une entrée qui fournit un contexte supplémentaire. Écrivez une réponse qui complète correctement la demande.\n\n### Instruction:\n{instruction}\n\n### Entrée:\n{input}\n\n### Réponse:\n",
4 |     "prompt_no_input": "Ci-dessous se trouve une instruction qui décrit une tâche. Écrivez une réponse qui complète correctement la demande.\n\n### Instruction:\n{instruction}\n\n### Réponse:\n",
5 |     "response_split": "### Réponse:"
6 | }
7 | 


--------------------------------------------------------------------------------
/utils/README.md:
--------------------------------------------------------------------------------
1 | # Directory for helpers modules
2 | 
3 | ## prompter.py
4 | 
5 | Prompter class, a template manager.
6 | 
7 | `from utils.prompter import Prompter`


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WangRongsheng/ChatGenTitle/799c25ca3c0c4c8f180714d0a157a143d9b7e083/utils/__init__.py


--------------------------------------------------------------------------------
/utils/prompter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A dedicated helper to manage templates and prompt building.
 3 | """
 4 | 
 5 | import json
 6 | import os.path as osp
 7 | from typing import Union
 8 | 
 9 | 
10 | class Prompter(object):
11 |     __slots__ = ("template", "_verbose")
12 | 
13 |     def __init__(self, template_name: str = "", verbose: bool = False):
14 |         self._verbose = verbose
15 |         if not template_name:
16 |             # Enforce the default here, so the constructor can be called with '' and will not break.
17 |             template_name = "alpaca"
18 |         file_name = osp.join("templates", f"{template_name}.json")
19 |         if not osp.exists(file_name):
20 |             raise ValueError(f"Can't read {file_name}")
21 |         with open(file_name) as fp:
22 |             self.template = json.load(fp)
23 |         if self._verbose:
24 |             print(
25 |                 f"Using prompt template {template_name}: {self.template['description']}"
26 |             )
27 | 
28 |     def generate_prompt(
29 |         self,
30 |         instruction: str,
31 |         input: Union[None, str] = None,
32 |         label: Union[None, str] = None,
33 |     ) -> str:
34 |         # returns the full prompt from instruction and optional input
35 |         # if a label (=response, =output) is provided, it's also appended.
36 |         if input:
37 |             res = self.template["prompt_input"].format(
38 |                 instruction=instruction, input=input
39 |             )
40 |         else:
41 |             res = self.template["prompt_no_input"].format(
42 |                 instruction=instruction
43 |             )
44 |         if label:
45 |             res = f"{res}{label}"
46 |         if self._verbose:
47 |             print(res)
48 |         return res
49 | 
50 |     def get_response(self, output: str) -> str:
51 |         return output.split(self.template["response_split"])[1].strip()
52 | 


--------------------------------------------------------------------------------