├── .gitignore ├── LICENSE ├── README.md ├── Step0_for_linux_install_pwsh.bash ├── Step1_install-uv.ps1 ├── Step2_embedding.ps1 ├── dash_page.py ├── datasets.py ├── datasets └── put images here ├── lancedatasets.py ├── main.py ├── model.py ├── pipeline.py ├── process_image.py ├── requirements-uv.txt ├── requirements.txt ├── transfromer2lance.ps1 ├── uv-installer.ps1 └── uv-installer.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | datasets/ 165 | huggingface/ 166 | .lance* 167 | .lance 168 | .lance/ 169 | output/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2024 青龍聖者@bdsqlsz 5 | 6 | Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. 11 | 12 | The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. 13 | 14 | When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. 15 | 16 | Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. 17 | 18 | A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. 19 | 20 | The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. 21 | 22 | An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. 23 | 24 | The precise terms and conditions for copying, distribution and modification follow. 25 | 26 | TERMS AND CONDITIONS 27 | 0. Definitions. 28 | "This License" refers to version 3 of the GNU Affero General Public License. 29 | 30 | "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. 31 | 32 | "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. 33 | 34 | To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. 35 | 36 | A "covered work" means either the unmodified Program or a work based on the Program. 37 | 38 | To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. 39 | 40 | To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. 41 | 42 | An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 43 | 44 | 1. Source Code. 45 | The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. 46 | 47 | A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. 48 | 49 | The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. 50 | 51 | The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. 52 | 53 | The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. 54 | 55 | The Corresponding Source for a work in source code form is that same work. 56 | 57 | 2. Basic Permissions. 58 | All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. 59 | 60 | You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. 61 | 62 | Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 63 | 64 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 65 | No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. 66 | 67 | When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 68 | 69 | 4. Conveying Verbatim Copies. 70 | You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. 71 | 72 | You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 73 | 74 | 5. Conveying Modified Source Versions. 75 | You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: 76 | 77 | a) The work must carry prominent notices stating that you modified it, and giving a relevant date. 78 | b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". 79 | c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. 80 | d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. 81 | A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 82 | 83 | 6. Conveying Non-Source Forms. 84 | You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: 85 | 86 | a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. 87 | b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. 88 | c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. 89 | d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. 90 | e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. 91 | A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. 92 | 93 | A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. 94 | 95 | "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. 96 | 97 | If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). 98 | 99 | The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. 100 | 101 | Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 102 | 103 | 7. Additional Terms. 104 | "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. 105 | 106 | When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. 107 | 108 | Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: 109 | 110 | a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or 111 | b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or 112 | c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or 113 | d) Limiting the use for publicity purposes of names of licensors or authors of the material; or 114 | e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or 115 | f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. 116 | All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. 117 | 118 | If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. 119 | 120 | Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 121 | 122 | 8. Termination. 123 | You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). 124 | 125 | However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. 126 | 127 | Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. 128 | 129 | Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 130 | 131 | 9. Acceptance Not Required for Having Copies. 132 | You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 133 | 134 | 10. Automatic Licensing of Downstream Recipients. 135 | Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. 136 | 137 | An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. 138 | 139 | You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 140 | 141 | 11. Patents. 142 | A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". 143 | 144 | A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. 145 | 146 | Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. 147 | 148 | In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. 149 | 150 | If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. 151 | 152 | If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. 153 | 154 | A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. 155 | 156 | Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 157 | 158 | 12. No Surrender of Others' Freedom. 159 | If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 160 | 161 | 13. Remote Network Interaction; Use with the GNU General Public License. 162 | Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. 163 | 164 | Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 165 | 166 | 14. Revised Versions of this License. 167 | The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. 168 | 169 | Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. 170 | 171 | If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. 172 | 173 | Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 174 | 175 | 15. Disclaimer of Warranty. 176 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 177 | 178 | 16. Limitation of Liability. 179 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 180 | 181 | 17. Interpretation of Sections 15 and 16. 182 | If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. 183 | 184 | END OF TERMS AND CONDITIONS 185 | 186 | How to Apply These Terms to Your New Programs 187 | If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. 188 | 189 | To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. 190 | 191 | 192 | Copyright (C) 193 | 194 | This program is free software: you can redistribute it and/or modify 195 | it under the terms of the GNU Affero General Public License as 196 | published by the Free Software Foundation, either version 3 of the 197 | License, or (at your option) any later version. 198 | 199 | This program is distributed in the hope that it will be useful, 200 | but WITHOUT ANY WARRANTY; without even the implied warranty of 201 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 202 | GNU Affero General Public License for more details. 203 | 204 | You should have received a copy of the GNU Affero General Public License 205 | along with this program. If not, see . 206 | Also add information on how to contact you by electronic and paper mail. 207 | 208 | If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. 209 | 210 | You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . 211 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSD_image2embedding 2 | 3 | code reference:https://huggingface.co/yuxi-liu-wired/CSD 4 | 5 | ## Installation 6 | 7 | ### Windows: 8 | 9 | Clone this repo 10 | 11 | `git clone https://github.com/sdbds/CSD_image2embedding` or Download in release 12 | 13 | Powershell run with `Step1_install-uv.ps1`(right clik) 14 | 15 | Then auto install(including python) 16 | 17 | ### Linux: 18 | First use `bash Step0_for_linux_install_pwsh.bash` 19 | 20 | Then `pwsh Step1_install-uv.ps1` or `sudo pwsh Step1_install-uv.ps1` 21 | 22 | ## Useage 23 | 24 | 1、Put any image datasets to `datasets` folder 25 | 26 | 2、Powershell run with `Step2_embedding.ps1`(right clik) 27 | Linux use `pwsh Step2_embedding.ps1` or `sudo pwsh Step1_install-uv.ps1` 28 | 29 | 3、Open address in terminal(should be automatic) 30 | 31 | 4、results save in `output` folder 32 | -------------------------------------------------------------------------------- /Step0_for_linux_install_pwsh.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | #export PIP_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple" 3 | #export HF_ENDPOINT="https://hf-mirror.com" 4 | 5 | echo "检查是否已安装 PowerShell..." 6 | if ! command -v pwsh &> /dev/null 7 | then 8 | echo "PowerShell 未安装,正在安装..." 9 | 10 | # 下载 PowerShell '.tar.gz' 压缩包 11 | curl -L -o /tmp/powershell.tar.gz https://github.com/PowerShell/PowerShell/releases/download/v7.4.5/powershell-7.4.5-linux-x64.tar.gz 12 | 13 | # 创建目标文件夹 14 | sudo mkdir -p /opt/microsoft/powershell/7 15 | 16 | # 解压 PowerShell 到目标文件夹 17 | sudo tar zxf /tmp/powershell.tar.gz -C /opt/microsoft/powershell/7 18 | 19 | # 设置执行权限 20 | sudo chmod +x /opt/microsoft/powershell/7/pwsh 21 | 22 | # 创建指向 pwsh 的符号链接 23 | sudo ln -s /opt/microsoft/powershell/7/pwsh /usr/bin/pwsh 24 | 25 | echo "PowerShell 安装完成" 26 | else 27 | echo "PowerShell 已安装" 28 | fi 29 | 30 | echo "Install completed" 31 | -------------------------------------------------------------------------------- /Step1_install-uv.ps1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdbds/CSD_image2embedding/91b7781c515cdf165b3c76259b8a55599d26f7fd/Step1_install-uv.ps1 -------------------------------------------------------------------------------- /Step2_embedding.ps1: -------------------------------------------------------------------------------- 1 | $batch_size = 12 2 | $k_clusters = 40 # k_clusters for K clustering 3 | $min_cluster_size = 10 # min_cluster_size for auto clustering 4 | $symlink = 1 # 0 or 1 for False or True about use symlink 5 | 6 | # ============= DO NOT MODIFY CONTENTS BELOW | 请勿修改下方内容 ===================== 7 | # Activate python venv 8 | Set-Location $PSScriptRoot 9 | if ($env:OS -ilike "*windows*") { 10 | if (Test-Path "./venv/Scripts/activate") { 11 | Write-Output "Windows venv" 12 | ./venv/Scripts/activate 13 | } 14 | elseif (Test-Path "./.venv/Scripts/activate") { 15 | Write-Output "Windows .venv" 16 | ./.venv/Scripts/activate 17 | } 18 | } 19 | elseif (Test-Path "./venv/bin/activate") { 20 | Write-Output "Linux venv" 21 | ./venv/bin/Activate.ps1 22 | } 23 | elseif (Test-Path "./.venv/bin/activate") { 24 | Write-Output "Linux .venv" 25 | ./.venv/bin/activate.ps1 26 | } 27 | 28 | $Env:HF_HOME = "huggingface" 29 | $Env:XFORMERS_FORCE_DISABLE_TRITON = "1" 30 | #$Env:HF_ENDPOINT = "https://hf-mirror.com" 31 | $ext_args = [System.Collections.ArrayList]::new() 32 | 33 | if ($batch_size -ne 12) { 34 | [void]$ext_args.Add("--batch_size=$batch_size") 35 | } 36 | 37 | if ($k_clusters -ne 40) { 38 | [void]$ext_args.Add("--k_clusters=$k_clusters") 39 | } 40 | 41 | if ($min_cluster_size -ne 10) { 42 | [void]$ext_args.Add("--min_cluster_size=$min_cluster_size") 43 | } 44 | 45 | if ($symlink) { 46 | [void]$ext_args.Add("--symlink") 47 | } 48 | 49 | # run train 50 | python main.py $ext_args 51 | 52 | Write-Output "Train finished" 53 | Read-Host | Out-Null ; -------------------------------------------------------------------------------- /dash_page.py: -------------------------------------------------------------------------------- 1 | from sklearn.cluster import KMeans 2 | from hdbscan import HDBSCAN 3 | import numpy as np 4 | import random 5 | import socket 6 | from dash import dcc, html, Input, Output, no_update, Dash, callback_context 7 | import plotly.graph_objects as go 8 | from PIL import Image 9 | import base64 10 | import io 11 | import os 12 | from scipy.spatial.distance import cdist 13 | from process_image import classify_images 14 | import webbrowser 15 | 16 | 17 | def find_free_port(): 18 | while True: 19 | port = random.randint(49152, 65535) # Use dynamic/private port range 20 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 21 | try: 22 | s.bind(("", port)) 23 | return port 24 | except OSError: 25 | pass 26 | 27 | 28 | def create_dash_app(fig, images): 29 | app = Dash(__name__) 30 | 31 | app.layout = html.Div( 32 | className="container", 33 | children=[ 34 | dcc.Graph(id="graph", figure=fig, clear_on_unhover=True), 35 | dcc.Tooltip(id="graph-tooltip", direction="bottom"), 36 | ], 37 | ) 38 | 39 | @app.callback( 40 | Output("graph-tooltip", "show"), 41 | Output("graph-tooltip", "bbox"), 42 | Output("graph-tooltip", "children"), 43 | Input("graph", "hoverData"), 44 | ) 45 | def display_hover(hoverData): 46 | if hoverData is None: 47 | return False, no_update, no_update 48 | 49 | hover_data = hoverData["points"][0] 50 | bbox = hover_data["bbox"] 51 | num = hover_data["pointNumber"] 52 | 53 | image_base64 = images[num] 54 | image_data = base64.b64decode(image_base64) 55 | image = Image.open(io.BytesIO(image_data)) 56 | width, height = image.size 57 | children = [ 58 | html.Div( 59 | [ 60 | html.Img( 61 | src=f"data:image/jpeg;base64,{image_base64}", 62 | style={ 63 | "width": f"{width}px", 64 | "height": f"{height}px", 65 | "display": "block", 66 | "margin": "0 auto", 67 | }, 68 | ), 69 | ] 70 | ) 71 | ] 72 | 73 | return True, bbox, children 74 | 75 | return app 76 | 77 | 78 | def perform_kmeans(data, k=40, feature_set="1"): 79 | # Extract x, y coordinates based on feature set 80 | if feature_set == "1": 81 | coords = data[["x1", "y1"]].to_numpy() 82 | else: 83 | coords = data[["x2", "y2"]].to_numpy() 84 | 85 | # Perform k-means clustering 86 | kmeans = KMeans(n_clusters=k, random_state=42) 87 | kmeans.fit(coords) 88 | 89 | return kmeans 90 | 91 | 92 | def perform_hdbscan(data, min_cluster_size=5, feature_set="1"): 93 | # Extract x, y coordinates based on feature set 94 | if feature_set == "1": 95 | coords = data[["x1", "y1"]].to_numpy() 96 | else: 97 | coords = data[["x2", "y2"]].to_numpy() 98 | 99 | # Perform HDBSCAN clustering 100 | hdbscan = HDBSCAN( 101 | min_cluster_size=min_cluster_size, 102 | ) 103 | hdbscan.fit(coords) 104 | 105 | return hdbscan 106 | 107 | 108 | def find_nearest_images(data, kmeans, feature_set="1"): 109 | if feature_set == "1": 110 | coords = data[["x1", "y1"]].to_numpy() 111 | else: 112 | coords = data[["x2", "y2"]].to_numpy() 113 | images = data["image"].tolist() 114 | 115 | if isinstance(kmeans, KMeans): 116 | 117 | # Calculate distances to cluster centers 118 | distances = cdist(coords, kmeans.cluster_centers_, metric="euclidean") 119 | 120 | # Find the index of the nearest point for each cluster 121 | nearest_indices = distances.argmin(axis=0) 122 | 123 | # Get the images nearest to each cluster center 124 | nearest_images = [images[i] for i in nearest_indices] 125 | 126 | return nearest_images, kmeans.cluster_centers_ 127 | 128 | else: 129 | 130 | nearest_images = [] 131 | cluster_centers = [] 132 | 133 | # Calculate distances to cluster centers 134 | for label in np.unique(kmeans.labels_): 135 | if label == -1: # Skip noise points 136 | continue 137 | cluster_indices = np.where(kmeans.labels_ == label)[0] 138 | cluster_coords = coords[cluster_indices] 139 | 140 | # Calculate the centroid of the cluster 141 | centroid = cluster_coords.mean(axis=0) 142 | cluster_centers.append(centroid) 143 | 144 | # Find the nearest point to the centroid 145 | distances = np.linalg.norm(cluster_coords - centroid, axis=1) 146 | nearest_index = cluster_indices[np.argmin(distances)] 147 | nearest_images.append(images[nearest_index]) 148 | 149 | return nearest_images, np.array(cluster_centers) 150 | 151 | 152 | def create_dash_fig( 153 | data, kmeans_result, nearest_images, cluster_centers, title, feature_set="1" 154 | ): 155 | # Extract x, y coordinates based on feature set 156 | if feature_set == "1": 157 | x = data["x1"].tolist() 158 | y = data["y1"].tolist() 159 | else: 160 | x = data["x2"].tolist() 161 | y = data["y2"].tolist() 162 | images = data["image"].tolist() 163 | 164 | # Determine the range for both axes 165 | max_range = max(max(x) - min(x), max(y) - min(y)) / 2 166 | center_x = (max(x) + min(x)) / 2 167 | center_y = (max(y) + min(y)) / 2 168 | 169 | # Create the scatter plot 170 | fig = go.Figure() 171 | 172 | # Add data points with enhanced color scheme 173 | fig.add_trace( 174 | go.Scatter( 175 | x=x, 176 | y=y, 177 | mode="markers", 178 | marker=dict( 179 | size=5, 180 | color=kmeans_result.labels_, 181 | colorscale="hsv", 182 | showscale=True, 183 | colorbar=dict(title="style"), 184 | opacity=0.8, 185 | ), 186 | name="Data Points", 187 | ) 188 | ) 189 | 190 | # Add cluster centers 191 | fig.add_trace( 192 | go.Scatter( 193 | x=cluster_centers[:, 0], 194 | y=cluster_centers[:, 1], 195 | mode="markers", 196 | marker=dict( 197 | symbol="star", 198 | size=15, 199 | color="black", 200 | line=dict(width=2, color="DarkSlateGrey"), 201 | ), 202 | name="Cluster Centers", 203 | ) 204 | ) 205 | 206 | # Add cluster centers and images 207 | 208 | fig.update_layout( 209 | title=title, 210 | width=1000, 211 | height=1000, 212 | xaxis=dict( 213 | range=[center_x - max_range, center_x + max_range], 214 | scaleanchor="y", 215 | scaleratio=1, 216 | ), 217 | yaxis=dict( 218 | range=[center_y - max_range, center_y + max_range], 219 | ), 220 | showlegend=False, 221 | ) 222 | 223 | fig.update_traces( 224 | hoverinfo="none", 225 | hovertemplate=None, 226 | ) 227 | # Add images 228 | for i, (cx, cy) in enumerate(cluster_centers): 229 | fig.add_layout_image( 230 | dict( 231 | source=f"data:image/jpg;base64,{nearest_images[i]}", 232 | x=cx, 233 | y=cy, 234 | xref="x", 235 | yref="y", 236 | sizex=1, 237 | sizey=1, 238 | sizing="contain", 239 | opacity=1, 240 | layer="below", 241 | ) 242 | ) 243 | 244 | # Remove x and y axes ticks 245 | fig.update_layout(xaxis=dict(visible=False), yaxis=dict(visible=False)) 246 | 247 | return fig, images 248 | 249 | 250 | def create_multi_view_dash_app(view_data): 251 | app = Dash(__name__) 252 | 253 | app.layout = html.Div( 254 | [ 255 | html.H1("Multi-view Clustering Visualization"), 256 | html.Div( 257 | [ 258 | dcc.Tabs( 259 | id="tabs", 260 | value="tab-0", 261 | children=[ 262 | dcc.Tab(label=f"View {i+1}", value=f"tab-{i}") 263 | for i in range(len(view_data)) 264 | ], 265 | ), 266 | html.Div(id="tabs-content"), 267 | ] 268 | ), 269 | dcc.Tooltip(id="graph-tooltip", direction="bottom"), 270 | ] 271 | ) 272 | 273 | @app.callback(Output("tabs-content", "children"), Input("tabs", "value")) 274 | def render_content(tab): 275 | index = int(tab.split("-")[1]) 276 | fig, images = view_data[index] 277 | return html.Div([dcc.Graph(id="graph", figure=fig, clear_on_unhover=True)]) 278 | 279 | @app.callback( 280 | Output("graph-tooltip", "show"), 281 | Output("graph-tooltip", "bbox"), 282 | Output("graph-tooltip", "children"), 283 | Input("graph", "hoverData"), 284 | Input("tabs", "value"), 285 | ) 286 | def display_hover(hoverData, tab): 287 | if hoverData is None: 288 | return False, no_update, no_update 289 | 290 | index = int(tab.split("-")[1]) 291 | _, images = view_data[index] 292 | 293 | hover_data = hoverData["points"][0] 294 | bbox = hover_data["bbox"] 295 | num = hover_data["pointNumber"] 296 | 297 | image_base64 = images[num] 298 | image_data = base64.b64decode(image_base64) 299 | image = Image.open(io.BytesIO(image_data)) 300 | width, height = image.size 301 | children = [ 302 | html.Div( 303 | [ 304 | html.Img( 305 | src=f"data:image/jpeg;base64,{image_base64}", 306 | style={ 307 | "width": f"{width}px", 308 | "height": f"{height}px", 309 | "display": "block", 310 | "margin": "0 auto", 311 | }, 312 | ), 313 | ] 314 | ) 315 | ] 316 | 317 | return True, bbox, children 318 | 319 | return app 320 | 321 | 322 | def make_multi_view_dash( 323 | datasets, 324 | titles, 325 | params_list, 326 | args, 327 | feature_set="1", 328 | ): 329 | view_data = [] 330 | 331 | for title, params in zip(titles, params_list): 332 | datasets_df = datasets.to_table().to_pandas() 333 | 334 | feature_set = params.get("feature_set", "1") 335 | 336 | if params.get("hdbscan", False): 337 | clustering_result = perform_hdbscan( 338 | datasets_df, 339 | min_cluster_size=args.min_cluster_size, 340 | feature_set=feature_set, 341 | ) 342 | else: 343 | clustering_result = perform_kmeans( 344 | datasets_df, k=params.get("k", 40), feature_set=feature_set 345 | ) 346 | 347 | if args.output_dir: 348 | classify_images( 349 | datasets_df, 350 | clustering_result, 351 | args, 352 | os.path.join(args.output_dir, title), 353 | ) 354 | 355 | nearest_images, cluster_centers = find_nearest_images( 356 | datasets_df, clustering_result, feature_set=feature_set 357 | ) 358 | fig, images = create_dash_fig( 359 | datasets_df, 360 | clustering_result, 361 | nearest_images, 362 | cluster_centers, 363 | title, 364 | feature_set=feature_set, 365 | ) 366 | view_data.append((fig, images)) 367 | 368 | app = create_multi_view_dash_app(view_data) 369 | port = find_free_port() 370 | url = f"http://127.0.0.1:{port}/" 371 | print(f"Serving on {url}") 372 | print(f"To serve this over the Internet, run `ngrok http {port}`") 373 | webbrowser.open(url) 374 | app.run_server(port=port) 375 | return app 376 | 377 | 378 | def make_dash_kmeans(datasets, title, k=50, hdbscan=False, output_dir="output"): 379 | datasets = datasets.to_table().to_pandas() 380 | kmeans_result = ( 381 | perform_kmeans(datasets, k=k) 382 | if not hdbscan 383 | else perform_hdbscan(datasets, min_cluster_size=5) 384 | ) 385 | if output_dir: 386 | classify_images(datasets, kmeans_result, output_dir) 387 | nearest_images, cluster_centers = find_nearest_images(datasets, kmeans_result) 388 | fig, images = create_dash_fig( 389 | datasets, kmeans_result, nearest_images, cluster_centers, title 390 | ) 391 | app = create_dash_app(fig, images) 392 | port = find_free_port() 393 | url = f"http://127.0.0.1:{port}/" 394 | print(f"Serving on {url}") 395 | print(f"To serve this over the Internet, run `ngrok http {port}`") 396 | webbrowser.open(url) 397 | app.run_server(port=port) 398 | return app 399 | -------------------------------------------------------------------------------- /datasets.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | from torch.utils.data import Dataset 4 | from PIL import Image 5 | import lance 6 | 7 | class CustomDataset(Dataset): 8 | def __init__(self, image_or_lance_path, transform=None): 9 | self.ds = lance.dataset(image_or_lance_path) 10 | self.transform = transform 11 | 12 | def __len__(self): 13 | return self.ds.count_rows() 14 | 15 | def load_image(self, idx): 16 | raw_img = self.ds.take([idx], columns=["image"]).to_pydict() 17 | img = Image.open(io.BytesIO(raw_img["image"][0])) 18 | if img.mode != "RGB": 19 | img = img.convert("RGB") 20 | if self.transform: 21 | img = self.transform(img) 22 | return img 23 | 24 | def load_path(self, idx): 25 | filename = self.ds.take([idx], columns=["filename"]).to_pydict() 26 | return filename["filename"][0] 27 | 28 | def __getitem__(self, idx): 29 | path = self.load_path(idx) 30 | img = self.load_image(idx) 31 | return path, img 32 | -------------------------------------------------------------------------------- /datasets/put images here: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lancedatasets.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from PIL import Image 4 | import hashlib 5 | 6 | import lance 7 | import pyarrow as pa 8 | 9 | from tqdm.auto import tqdm 10 | 11 | IMAGE_EXTENSIONS = [ 12 | ".png", 13 | ".jpg", 14 | ".jpeg", 15 | ".webp", 16 | ".bmp", 17 | ".PNG", 18 | ".JPG", 19 | ".JPEG", 20 | ".WEBP", 21 | ".BMP", 22 | ] 23 | 24 | try: 25 | import pillow_avif 26 | 27 | IMAGE_EXTENSIONS.extend([".avif", ".AVIF"]) 28 | except: 29 | pass 30 | 31 | # JPEG-XL on Linux 32 | try: 33 | from jxlpy import JXLImagePlugin 34 | 35 | IMAGE_EXTENSIONS.extend([".jxl", ".JXL"]) 36 | except: 37 | pass 38 | 39 | # JPEG-XL on Windows 40 | try: 41 | import pillow_jxl 42 | 43 | IMAGE_EXTENSIONS.extend([".jxl", ".JXL"]) 44 | except: 45 | pass 46 | 47 | # 将 IMAGE_EXTENSIONS 转换为元组 48 | IMAGE_EXTENSIONS = tuple(IMAGE_EXTENSIONS) 49 | 50 | 51 | def load_data(images_dir, texts_dir): 52 | data = [] 53 | if texts_dir: 54 | images = sorted(os.listdir(images_dir)) 55 | texts = sorted(os.listdir(texts_dir)) 56 | 57 | for image_file, text_file in zip(images, texts): 58 | if image_file.endswith(IMAGE_EXTENSIONS) and text_file.endswith(".txt"): 59 | with open( 60 | os.path.join(texts_dir, text_file), "r", encoding="utf-8" 61 | ) as file: 62 | caption = file.read().strip() 63 | data.append( 64 | { 65 | "image_path": os.path.join(root, image_file), 66 | "caption": caption, 67 | } 68 | ) 69 | 70 | else: 71 | for root, dirs, files in os.walk(images_dir): 72 | for image_file in files: 73 | if image_file.endswith(IMAGE_EXTENSIONS): 74 | text_file = os.path.splitext(image_file)[0] + ".txt" 75 | text_path = os.path.join(root, text_file) 76 | if os.path.exists(text_path): 77 | with open(text_path, "r", encoding="utf-8") as file: 78 | caption = file.read().strip() 79 | 80 | data.append( 81 | { 82 | "image_path": os.path.join(root, image_file), 83 | "caption": caption, 84 | } 85 | ) 86 | else: 87 | data.append( 88 | { 89 | "image_path": os.path.join(root, image_file), 90 | "caption": "", 91 | } 92 | ) 93 | return data 94 | 95 | 96 | def process(data, only_save_path: bool = False): 97 | for item in tqdm(data): 98 | image_path = item["image_path"] 99 | caption = item["caption"] 100 | print(f"Processing image '{image_path}'...") 101 | print(f"Caption: {caption}") 102 | try: 103 | with open(image_path, "rb") as im: 104 | binary_im = im.read() 105 | image_hash = hashlib.sha256(binary_im).hexdigest() 106 | img = Image.open(image_path) 107 | width, height = img.size 108 | image_size = os.path.getsize(image_path) 109 | except FileNotFoundError: 110 | print( 111 | f"Image '{os.path.basename(image_path)}' not found in the folder, skipping." 112 | ) 113 | continue 114 | except (IOError, SyntaxError) as e: 115 | print( 116 | f"Error opening image '{os.path.basename(image_path)}': {str(e)}. Truncating the file." 117 | ) 118 | continue 119 | 120 | print(f"Image '{image_path}' processed successfully.") 121 | if only_save_path: 122 | binary_im = b"" 123 | 124 | filename = pa.array([os.path.abspath(image_path)], type=pa.string()) 125 | extension = pa.array( 126 | [os.path.splitext(os.path.basename(image_path))[1]], type=pa.string() 127 | ) 128 | width = pa.array([int(width)], type=pa.int32()) 129 | height = pa.array([int(height)], type=pa.int32()) 130 | size = pa.array([image_size], type=pa.int64()) 131 | hash = pa.array([image_hash], type=pa.string()) 132 | img = pa.array([binary_im], type=pa.binary()) 133 | captions = pa.array([caption], type=pa.string()) 134 | 135 | yield pa.RecordBatch.from_arrays( 136 | [filename, extension, hash, size, width, height, img, captions], 137 | [ 138 | "filename", 139 | "extension", 140 | "hash", 141 | "size", 142 | "width", 143 | "height", 144 | "image", 145 | "captions", 146 | ], 147 | ) 148 | 149 | 150 | def transform2lance( 151 | train_data_dir, capation_dir=None, output_name="datasets", only_save_path: bool = False 152 | ): 153 | 154 | data = load_data(train_data_dir, capation_dir) 155 | 156 | schema = pa.schema( 157 | [ 158 | pa.field("filename", pa.string()), 159 | pa.field("extension", pa.string()), 160 | pa.field("hash", pa.string()), 161 | pa.field("size", pa.int64()), 162 | pa.field("width", pa.int32()), 163 | pa.field("height", pa.int32()), 164 | pa.field("image", pa.binary()), 165 | pa.field("captions", pa.string()), 166 | ] 167 | ) 168 | try: 169 | reader = pa.RecordBatchReader.from_batches(schema, process(data, only_save_path)) 170 | lance.write_dataset(reader, output_name + ".lance", schema) 171 | except AttributeError as e: 172 | print(f"AttributeError: {e}") 173 | 174 | 175 | def setup_parser() -> argparse.ArgumentParser: 176 | parser = argparse.ArgumentParser() 177 | 178 | parser.add_argument("train_data_dir", type=str, help="directory for train images") 179 | parser.add_argument( 180 | "--captions_dir", type=str, default=None, help="directory for train images" 181 | ) 182 | parser.add_argument( 183 | "--output_name", type=str, default="datasets", help="directory for train images" 184 | ) 185 | parser.add_argument( 186 | "--only_save_path", action="store_true", help="only save the path of images" 187 | ) 188 | return parser 189 | 190 | 191 | if __name__ == "__main__": 192 | parser = setup_parser() 193 | 194 | args = parser.parse_args() 195 | 196 | transform2lance( 197 | args.train_data_dir, args.captions_dir, args.output_name, args.only_save_path 198 | ) 199 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import lance 3 | import torch 4 | import io 5 | import base64 6 | from model import CSD_CLIP 7 | from transformers import CLIPProcessor 8 | from pipeline import CSDCLIPPipeline 9 | from datasets import CustomDataset 10 | from rich.progress import Progress 11 | from dash_page import make_dash_kmeans, make_multi_view_dash 12 | from lancedatasets import transform2lance 13 | from PIL import Image 14 | import numpy as np 15 | 16 | import pyarrow as pa 17 | import umap 18 | import argparse 19 | 20 | IMAGE_SIZE = 336 21 | 22 | 23 | def collate_fn_remove_corrupted(batch): 24 | """Collate function that allows to remove corrupted examples in the 25 | dataloader. It expects that the dataloader returns 'None' when that occurs. 26 | The 'None's in the batch are removed. 27 | """ 28 | # Filter out all the Nones (corrupted examples) 29 | batch = list(filter(lambda x: x is not None, batch)) 30 | return batch 31 | 32 | 33 | def resize_image(image, max_resolution=192): 34 | if max(image.width, image.height) > max_resolution: 35 | image = image.resize( 36 | (max_resolution, int(image.height * max_resolution / image.width)) 37 | ) 38 | return image 39 | 40 | 41 | def remove_white_borders(image): 42 | image_np = np.array(image) 43 | mask = image_np != 255 44 | coords = np.argwhere(mask) 45 | x0, y0, _ = coords.min(axis=0) 46 | x1, y1, _ = coords.max(axis=0) + 1 # slices are exclusive at the top 47 | cropped_image = image_np[x0:x1, y0:y1, :] 48 | return Image.fromarray(cropped_image) 49 | 50 | 51 | def resize_and_remove_borders(image, max_resolution=192): 52 | image = resize_image(image, max_resolution) 53 | image = remove_white_borders(image) 54 | return image 55 | 56 | 57 | def preprocess_image(image): 58 | image = np.array(image) 59 | image = image[:, :, ::-1] # RGB->BGR 60 | 61 | # pad to square 62 | size = max(image.shape[0:2]) 63 | pad_x = size - image.shape[1] 64 | pad_y = size - image.shape[0] 65 | pad_l = pad_x // 2 66 | pad_t = pad_y // 2 67 | image = np.pad( 68 | image, 69 | ((pad_t, pad_y - pad_t), (pad_l, pad_x - pad_l), (0, 0)), 70 | mode="constant", 71 | constant_values=255, 72 | ) 73 | 74 | image = Image.fromarray(image[:, :, ::-1]).resize( 75 | (IMAGE_SIZE, IMAGE_SIZE), Image.LANCZOS 76 | ) 77 | 78 | return image 79 | 80 | 81 | if __name__ == "__main__": 82 | parser = argparse.ArgumentParser( 83 | description="CSD Style Embedding and Visualization" 84 | ) 85 | parser.add_argument( 86 | "--train_data_dir", 87 | type=str, 88 | default="datasets", 89 | help="directory for train images", 90 | ) 91 | parser.add_argument( 92 | "--dataset_path", 93 | type=str, 94 | default="datasets.lance", 95 | help="Path to the dataset file", 96 | ) 97 | parser.add_argument( 98 | "--embeddings_path", 99 | type=str, 100 | default="embeddings.lance", 101 | help="Path to save/load embeddings", 102 | ) 103 | parser.add_argument( 104 | "--model_name", 105 | type=str, 106 | default="yuxi-liu-wired/CSD", 107 | help="Name of the pretrained model", 108 | ) 109 | parser.add_argument( 110 | "--processor_name", 111 | type=str, 112 | default="openai/clip-vit-large-patch14", 113 | help="Name of the processor", 114 | ) 115 | parser.add_argument( 116 | "--batch_size", type=int, default=12, help="Batch size for data loading" 117 | ) 118 | parser.add_argument( 119 | "--num_workers", type=int, default=0, help="Number of workers for data loading" 120 | ) 121 | parser.add_argument( 122 | "--k_clusters", type=int, default=40, help="Number of clusters for KMeans" 123 | ) 124 | parser.add_argument( 125 | "--min_cluster_size", 126 | type=int, 127 | default=10, 128 | help="smaller size get more clusters for HDBSCAN", 129 | ) 130 | parser.add_argument( 131 | "--output_dir", 132 | type=str, 133 | default="output", 134 | help="Output directory for the classified images", 135 | ) 136 | parser.add_argument( 137 | "--symlink", 138 | action="store_true", 139 | help="Create symlinks instead of copying images", 140 | ) 141 | 142 | args = parser.parse_args() 143 | 144 | if not os.path.exists(args.dataset_path): 145 | transform2lance(args.train_data_dir) 146 | 147 | device = "cuda" if torch.cuda.is_available() else "cpu" 148 | model = CSD_CLIP.from_pretrained(args.model_name) 149 | model.to(device) 150 | 151 | processor = CLIPProcessor.from_pretrained(args.processor_name) 152 | pipeline = CSDCLIPPipeline(model=model, processor=processor, device=device) 153 | 154 | dataset = CustomDataset(args.dataset_path) 155 | 156 | if os.path.exists(args.embeddings_path): 157 | embeddingslance = lance.dataset(args.embeddings_path) 158 | else: 159 | dataloader = torch.utils.data.DataLoader( 160 | dataset, 161 | batch_size=args.batch_size, 162 | num_workers=args.num_workers, 163 | collate_fn=collate_fn_remove_corrupted, 164 | drop_last=False, 165 | pin_memory=True, 166 | ) 167 | 168 | style_embeddings = [] 169 | content_embeddings = [] 170 | imagelist = [] 171 | pathlist = [] 172 | with Progress() as progress: 173 | task = progress.add_task( 174 | "[green]Generating embeddings...", total=len(dataloader) 175 | ) 176 | for data in dataloader: 177 | for path, image in data: 178 | image = preprocess_image(image) 179 | outputs = pipeline(image) 180 | style_outputs = outputs["style_output"].squeeze(0) 181 | content_outputs = outputs["content_output"].squeeze(0) 182 | style_embeddings.append(style_outputs) 183 | content_embeddings.append(content_outputs) 184 | buffer = io.BytesIO() 185 | image = resize_and_remove_borders(image) 186 | image.save(buffer, format="JPEG") 187 | image_bytes = base64.b64encode(buffer.getvalue()).decode("utf-8") 188 | pathlist.append(path) 189 | imagelist.append(image_bytes) 190 | progress.update(task, advance=1) 191 | 192 | print("Embeddings generated successfully!") 193 | print("Saving embeddings to disk...") 194 | reducer = umap.UMAP( 195 | n_components=2, 196 | metric="cosine", 197 | random_state=42, 198 | ) 199 | style_umap_results = reducer.fit_transform(np.array(style_embeddings)) 200 | content_umap_results = reducer.fit_transform(np.array(content_embeddings)) 201 | 202 | new_data = pa.table( 203 | { 204 | "path": pa.array(pathlist), 205 | "image": pa.array(imagelist), 206 | "x1": pa.array(style_umap_results[:, 0]), 207 | "y1": pa.array(style_umap_results[:, 1]), 208 | "x2": pa.array(content_umap_results[:, 0]), 209 | "y2": pa.array(content_umap_results[:, 1]), 210 | } 211 | ) 212 | 213 | embeddingslance = lance.write_dataset(new_data, args.embeddings_path) 214 | 215 | titles = [ 216 | "KMeans_style", 217 | "HDBSCAN_style", 218 | "KMeans_content", 219 | "HDBSCAN_content", 220 | ] 221 | params_list = [ 222 | {"k": args.k_clusters, "hdbscan": False, "feature_set": "1"}, 223 | {"k": args.k_clusters, "hdbscan": True, "feature_set": "1"}, 224 | {"k": args.k_clusters, "hdbscan": False, "feature_set": "2"}, 225 | {"k": args.k_clusters, "hdbscan": True, "feature_set": "2"}, 226 | ] 227 | make_multi_view_dash(embeddingslance, titles, params_list, args) 228 | # make_dash_kmeans(embeddingslance, "style", k=args.k_clusters, output_dir=args.style_ouput_dir) 229 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch.nn as nn 3 | import clip 4 | from huggingface_hub import PyTorchModelHubMixin 5 | from transformers import PretrainedConfig 6 | 7 | class CSDCLIPConfig(PretrainedConfig): 8 | model_type = "csd_clip" 9 | 10 | def __init__( 11 | self, 12 | name="csd_large", 13 | embedding_dim=1024, 14 | feature_dim=1024, 15 | content_dim=768, 16 | style_dim=768, 17 | content_proj_head="default", 18 | **kwargs 19 | ): 20 | super().__init__(**kwargs) 21 | self.name = name 22 | self.embedding_dim = embedding_dim 23 | self.content_proj_head = content_proj_head 24 | self.task_specific_params = None # Add this line 25 | 26 | class CSD_CLIP(nn.Module, PyTorchModelHubMixin): 27 | """backbone + projection head""" 28 | def __init__(self, name='vit_large',content_proj_head='default'): 29 | super(CSD_CLIP, self).__init__() 30 | self.content_proj_head = content_proj_head 31 | if name == 'vit_large': 32 | clipmodel, _ = clip.load("ViT-L/14") 33 | self.backbone = clipmodel.visual 34 | self.embedding_dim = 1024 35 | self.feature_dim = 1024 36 | self.content_dim = 768 37 | self.style_dim = 768 38 | self.name = "csd_large" 39 | elif name == 'vit_base': 40 | clipmodel, _ = clip.load("ViT-B/16") 41 | self.backbone = clipmodel.visual 42 | self.embedding_dim = 768 43 | self.feature_dim = 512 44 | self.content_dim = 512 45 | self.style_dim = 512 46 | self.name = "csd_base" 47 | else: 48 | raise Exception('This model is not implemented') 49 | 50 | self.last_layer_style = copy.deepcopy(self.backbone.proj) 51 | self.last_layer_content = copy.deepcopy(self.backbone.proj) 52 | 53 | self.backbone.proj = None 54 | 55 | self.config = CSDCLIPConfig( 56 | name=self.name, 57 | embedding_dim=self.embedding_dim, 58 | feature_dim=self.feature_dim, 59 | content_dim=self.content_dim, 60 | style_dim=self.style_dim, 61 | content_proj_head=self.content_proj_head 62 | ) 63 | 64 | def get_config(self): 65 | return self.config.to_dict() 66 | 67 | @property 68 | def dtype(self): 69 | return self.backbone.conv1.weight.dtype 70 | 71 | @property 72 | def device(self): 73 | return next(self.parameters()).device 74 | 75 | def forward(self, input_data): 76 | 77 | feature = self.backbone(input_data) 78 | 79 | style_output = feature @ self.last_layer_style 80 | style_output = nn.functional.normalize(style_output, dim=1, p=2) 81 | 82 | content_output = feature @ self.last_layer_content 83 | content_output = nn.functional.normalize(content_output, dim=1, p=2) 84 | 85 | return feature, content_output, style_output 86 | -------------------------------------------------------------------------------- /pipeline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import Pipeline 3 | from typing import Union, List 4 | from PIL import Image 5 | 6 | class CSDCLIPPipeline(Pipeline): 7 | def __init__(self, model, processor, device=None): 8 | if device is None: 9 | device = "cuda" if torch.cuda.is_available() else "cpu" 10 | super().__init__(model=model, tokenizer=None, device=device) 11 | self.processor = processor 12 | 13 | def _sanitize_parameters(self, **kwargs): 14 | return {}, {}, {} 15 | 16 | def preprocess(self, images): 17 | if isinstance(images, (str, Image.Image)): 18 | images = [images] 19 | 20 | processed = self.processor(images=images, return_tensors="pt", padding=True, truncation=True) 21 | return {k: v.to(self.device) for k, v in processed.items()} 22 | 23 | def _forward(self, model_inputs): 24 | pixel_values = model_inputs['pixel_values'].to(self.model.dtype) 25 | with torch.no_grad(): 26 | features, content_output, style_output = self.model(pixel_values) 27 | return {"features": features, "content_output": content_output, "style_output": style_output} 28 | 29 | def postprocess(self, model_outputs): 30 | return { 31 | "features": model_outputs["features"].cpu().numpy(), 32 | "content_output": model_outputs["content_output"].cpu().numpy(), 33 | "style_output": model_outputs["style_output"].cpu().numpy() 34 | } 35 | 36 | def __call__(self, images: Union[str, List[str], Image.Image, List[Image.Image]]): 37 | return super().__call__(images) 38 | -------------------------------------------------------------------------------- /process_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from sklearn.cluster import KMeans 4 | from hdbscan import HDBSCAN 5 | 6 | def classify_images(data, kmeans_result, args, output_dir): 7 | 8 | # Create output directory if it doesn't exist 9 | if not os.path.exists(output_dir): 10 | os.makedirs(output_dir) 11 | 12 | # Get cluster labels and image data 13 | labels = kmeans_result.labels_ 14 | paths = data["path"].tolist() 15 | 16 | # Get the number of clusters 17 | if isinstance(kmeans_result, KMeans): 18 | n_clusters = kmeans_result.n_clusters 19 | else: # HDBSCAN 20 | n_clusters = len(set(labels)) - (1 if -1 in labels else 0) 21 | 22 | # Create a subdirectory for each cluster 23 | for i in range(n_clusters): 24 | cluster_dir = os.path.join(output_dir, f"class_{i}") 25 | if not os.path.exists(cluster_dir): 26 | os.makedirs(cluster_dir) 27 | 28 | # Create a directory for noise points (only for HDBSCAN) 29 | if isinstance(kmeans_result, HDBSCAN): 30 | noise_dir = os.path.join(output_dir, "noise") 31 | if not os.path.exists(noise_dir): 32 | os.makedirs(noise_dir) 33 | 34 | # Copy images to their respective cluster directories 35 | for i, (label, image_abs_path) in enumerate(zip(labels, paths)): 36 | if label == -1: # Noise point (HDBSCAN only) 37 | target_dir = noise_dir 38 | else: 39 | target_dir = os.path.join(output_dir, f"class_{label}") 40 | 41 | image_path = os.path.join(target_dir, f"image_{i}.jpg") 42 | 43 | # Copy image to the target directory 44 | if os.path.exists(image_abs_path) and not os.path.exists(image_path): 45 | if args.symlink: 46 | os.symlink(image_abs_path, image_path) 47 | else: 48 | shutil.copy(image_abs_path, image_path) 49 | elif not os.path.exists(image_path): 50 | print(f"Warning: {image_path} already exists and will be skipped.") 51 | else: 52 | print(f"Warning: {image_abs_path} does not exist and will be skipped.") 53 | 54 | print(f"Images have been classified and saved to {output_dir}") -------------------------------------------------------------------------------- /requirements-uv.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv pip compile .\requirements.txt -o requirements-uv.txt -i https://pypi.org/simple --index-strategy unsafe-best-match 3 | aiohappyeyeballs==2.4.0 4 | # via aiohttp 5 | aiohttp==3.10.5 6 | # via 7 | # datasets 8 | # fsspec 9 | aiosignal==1.3.1 10 | # via aiohttp 11 | async-timeout==4.0.3 12 | # via aiohttp 13 | attrs==24.2.0 14 | # via aiohttp 15 | blinker==1.8.2 16 | # via flask 17 | certifi==2022.12.7 18 | # via requests 19 | charset-normalizer==2.1.1 20 | # via requests 21 | click==8.1.7 22 | # via flask 23 | clip @ git+https://github.com/openai/CLIP.git@dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1 24 | # via -r ./requirements.txt 25 | colorama==0.4.6 26 | # via 27 | # click 28 | # tqdm 29 | cython==0.29.37 30 | # via hdbscan 31 | dash==2.18.1 32 | # via -r ./requirements.txt 33 | dash-core-components==2.0.0 34 | # via dash 35 | dash-html-components==2.0.0 36 | # via dash 37 | dash-table==5.0.0 38 | # via dash 39 | datasets==2.19.1 40 | # via -r ./requirements.txt 41 | dill==0.3.8 42 | # via 43 | # datasets 44 | # multiprocess 45 | filelock==3.13.1 46 | # via 47 | # datasets 48 | # huggingface-hub 49 | # torch 50 | # transformers 51 | flask==3.0.3 52 | # via dash 53 | frozenlist==1.4.1 54 | # via 55 | # aiohttp 56 | # aiosignal 57 | fsspec==2024.2.0 58 | # via 59 | # datasets 60 | # huggingface-hub 61 | # torch 62 | ftfy==6.2.3 63 | # via clip 64 | hdbscan==0.8.37 65 | # via -r ./requirements.txt 66 | huggingface-hub==0.25.0 67 | # via 68 | # datasets 69 | # tokenizers 70 | # transformers 71 | idna==3.4 72 | # via 73 | # requests 74 | # yarl 75 | importlib-metadata==8.5.0 76 | # via dash 77 | itsdangerous==2.2.0 78 | # via flask 79 | jinja2==3.1.3 80 | # via 81 | # flask 82 | # torch 83 | joblib==1.4.2 84 | # via 85 | # hdbscan 86 | # pynndescent 87 | # scikit-learn 88 | llvmlite==0.43.0 89 | # via 90 | # numba 91 | # pynndescent 92 | markdown-it-py==3.0.0 93 | # via rich 94 | markupsafe==2.1.5 95 | # via 96 | # jinja2 97 | # werkzeug 98 | mdurl==0.1.2 99 | # via markdown-it-py 100 | mpmath==1.3.0 101 | # via sympy 102 | multidict==6.1.0 103 | # via 104 | # aiohttp 105 | # yarl 106 | multiprocess==0.70.16 107 | # via datasets 108 | nest-asyncio==1.6.0 109 | # via dash 110 | networkx==3.2.1 111 | # via torch 112 | numba==0.60.0 113 | # via 114 | # pynndescent 115 | # umap-learn 116 | numpy==1.26.3 117 | # via 118 | # -r ./requirements.txt 119 | # datasets 120 | # hdbscan 121 | # numba 122 | # pandas 123 | # pyarrow 124 | # pylance 125 | # scikit-learn 126 | # scipy 127 | # torchvision 128 | # transformers 129 | # umap-learn 130 | packaging==22.0 131 | # via 132 | # clip 133 | # datasets 134 | # huggingface-hub 135 | # plotly 136 | # transformers 137 | pandas==2.2.3 138 | # via 139 | # -r ./requirements.txt 140 | # datasets 141 | pillow==10.2.0 142 | # via 143 | # -r ./requirements.txt 144 | # torchvision 145 | plotly==5.24.1 146 | # via 147 | # -r ./requirements.txt 148 | # dash 149 | pyarrow==17.0.0 150 | # via 151 | # -r ./requirements.txt 152 | # datasets 153 | # pylance 154 | pyarrow-hotfix==0.6 155 | # via datasets 156 | pygments==2.18.0 157 | # via rich 158 | pylance==0.18.0 159 | # via -r ./requirements.txt 160 | pynndescent==0.5.13 161 | # via umap-learn 162 | python-dateutil==2.9.0.post0 163 | # via pandas 164 | pytz==2024.2 165 | # via pandas 166 | pyyaml==6.0.2 167 | # via 168 | # datasets 169 | # huggingface-hub 170 | # transformers 171 | regex==2024.9.11 172 | # via 173 | # clip 174 | # transformers 175 | requests==2.28.1 176 | # via 177 | # dash 178 | # datasets 179 | # huggingface-hub 180 | # transformers 181 | retrying==1.3.4 182 | # via dash 183 | rich==13.8.1 184 | # via -r ./requirements.txt 185 | safetensors==0.4.5 186 | # via transformers 187 | scikit-learn==1.5.2 188 | # via 189 | # -r ./requirements.txt 190 | # hdbscan 191 | # pynndescent 192 | # umap-learn 193 | scipy==1.14.1 194 | # via 195 | # hdbscan 196 | # pynndescent 197 | # scikit-learn 198 | # umap-learn 199 | setuptools==70.0.0 200 | # via dash 201 | six==1.16.0 202 | # via 203 | # python-dateutil 204 | # retrying 205 | sympy==1.12 206 | # via torch 207 | tenacity==9.0.0 208 | # via plotly 209 | threadpoolctl==3.5.0 210 | # via scikit-learn 211 | tokenizers==0.19.1 212 | # via transformers 213 | torch==2.4.1+cu124 214 | # via 215 | # -r ./requirements.txt 216 | # clip 217 | # torchvision 218 | torchvision==0.19.1+cu124 219 | # via clip 220 | tqdm==4.64.1 221 | # via 222 | # -r ./requirements.txt 223 | # clip 224 | # datasets 225 | # huggingface-hub 226 | # transformers 227 | # umap-learn 228 | transformers==4.44.2 229 | # via -r ./requirements.txt 230 | typing-extensions==4.9.0 231 | # via 232 | # dash 233 | # huggingface-hub 234 | # multidict 235 | # torch 236 | tzdata==2024.1 237 | # via pandas 238 | umap-learn==0.5.6 239 | # via -r ./requirements.txt 240 | urllib3==1.26.13 241 | # via requests 242 | wcwidth==0.2.13 243 | # via ftfy 244 | werkzeug==3.0.4 245 | # via 246 | # dash 247 | # flask 248 | xxhash==3.5.0 249 | # via datasets 250 | yarl==1.11.1 251 | # via aiohttp 252 | zipp==3.20.2 253 | # via importlib-metadata 254 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | numpy 3 | scikit-learn 4 | pillow 5 | datasets 6 | tqdm 7 | clip @ git+https://github.com/openai/CLIP.git@main 8 | torch 9 | pylance 10 | pyarrow 11 | plotly 12 | dash 13 | rich 14 | transformers 15 | umap-learn 16 | hdbscan -------------------------------------------------------------------------------- /transfromer2lance.ps1: -------------------------------------------------------------------------------- 1 | 2 | 3 | $train_data_dir = "./2b" # input images path | 图片输入路径 4 | $output_name = "2b" 5 | 6 | accelerate launch --num_cpu_threads_per_process=8 "./lancedatasets.py" ` 7 | $train_data_dir ` 8 | --output_name=$output_name -------------------------------------------------------------------------------- /uv-installer.ps1: -------------------------------------------------------------------------------- 1 | # Licensed under the MIT license 2 | # , at your 3 | # option. This file may not be copied, modified, or distributed 4 | # except according to those terms. 5 | 6 | <# 7 | .SYNOPSIS 8 | 9 | The installer for uv 0.4.21 10 | 11 | .DESCRIPTION 12 | 13 | This script detects what platform you're on and fetches an appropriate archive from 14 | https://github.com/astral-sh/uv/releases/download/0.4.21 15 | then unpacks the binaries and installs them to 16 | 17 | $env:CARGO_HOME/bin (or $HOME/.cargo/bin) 18 | 19 | It will then add that dir to PATH by editing your Environment.Path registry key 20 | 21 | .PARAMETER ArtifactDownloadUrl 22 | The URL of the directory where artifacts can be fetched from 23 | 24 | .PARAMETER NoModifyPath 25 | Don't add the install directory to PATH 26 | 27 | .PARAMETER Help 28 | Print help 29 | 30 | #> 31 | 32 | param ( 33 | [Parameter(HelpMessage = "The URL of the directory where artifacts can be fetched from")] 34 | [string]$ArtifactDownloadUrl = 'https://github.com/astral-sh/uv/releases/download/0.4.21', 35 | [Parameter(HelpMessage = "Don't add the install directory to PATH")] 36 | [switch]$NoModifyPath, 37 | [Parameter(HelpMessage = "Print Help")] 38 | [switch]$Help 39 | ) 40 | 41 | $app_name = 'uv' 42 | $app_version = '0.4.21' 43 | 44 | $receipt = @" 45 | {"binaries":["CARGO_DIST_BINS"],"binary_aliases":{},"cdylibs":["CARGO_DIST_DYLIBS"],"cstaticlibs":["CARGO_DIST_STATICLIBS"],"install_prefix":"AXO_INSTALL_PREFIX","provider":{"source":"cargo-dist","version":"0.22.1"},"source":{"app_name":"uv","name":"uv","owner":"astral-sh","release_type":"github"},"version":"0.4.21"} 46 | "@ 47 | $receipt_home = "${env:LOCALAPPDATA}\uv" 48 | 49 | function Install-Binary($install_args) { 50 | if ($Help) { 51 | Get-Help $PSCommandPath -Detailed 52 | Exit 53 | } 54 | 55 | Initialize-Environment 56 | 57 | # Platform info injected by cargo-dist 58 | $platforms = @{ 59 | "aarch64-pc-windows-msvc" = @{ 60 | "artifact_name" = "uv-x86_64-pc-windows-msvc.zip" 61 | "bins" = @("uv.exe", "uvx.exe") 62 | "libs" = @() 63 | "staticlibs" = @() 64 | "zip_ext" = ".zip" 65 | "aliases" = @{ 66 | } 67 | "aliases_json" = '{}' 68 | } 69 | "i686-pc-windows-msvc" = @{ 70 | "artifact_name" = "uv-i686-pc-windows-msvc.zip" 71 | "bins" = @("uv.exe", "uvx.exe") 72 | "libs" = @() 73 | "staticlibs" = @() 74 | "zip_ext" = ".zip" 75 | "aliases" = @{ 76 | } 77 | "aliases_json" = '{}' 78 | } 79 | "x86_64-pc-windows-msvc" = @{ 80 | "artifact_name" = "uv-x86_64-pc-windows-msvc.zip" 81 | "bins" = @("uv.exe", "uvx.exe") 82 | "libs" = @() 83 | "staticlibs" = @() 84 | "zip_ext" = ".zip" 85 | "aliases" = @{ 86 | } 87 | "aliases_json" = '{}' 88 | } 89 | } 90 | 91 | $fetched = Download "$ArtifactDownloadUrl" $platforms 92 | # FIXME: add a flag that lets the user not do this step 93 | try { 94 | Invoke-Installer -artifacts $fetched -platforms $platforms "$install_args" 95 | } catch { 96 | throw @" 97 | We encountered an error trying to perform the installation; 98 | please review the error messages below. 99 | 100 | $_ 101 | "@ 102 | } 103 | } 104 | 105 | function Get-TargetTriple() { 106 | try { 107 | # NOTE: this might return X64 on ARM64 Windows, which is OK since emulation is available. 108 | # It works correctly starting in PowerShell Core 7.3 and Windows PowerShell in Win 11 22H2. 109 | # Ideally this would just be 110 | # [System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture 111 | # but that gets a type from the wrong assembly on Windows PowerShell (i.e. not Core) 112 | $a = [System.Reflection.Assembly]::LoadWithPartialName("System.Runtime.InteropServices.RuntimeInformation") 113 | $t = $a.GetType("System.Runtime.InteropServices.RuntimeInformation") 114 | $p = $t.GetProperty("OSArchitecture") 115 | # Possible OSArchitecture Values: https://learn.microsoft.com/dotnet/api/system.runtime.interopservices.architecture 116 | # Rust supported platforms: https://doc.rust-lang.org/stable/rustc/platform-support.html 117 | switch ($p.GetValue($null).ToString()) 118 | { 119 | "X86" { return "i686-pc-windows-msvc" } 120 | "X64" { return "x86_64-pc-windows-msvc" } 121 | "Arm" { return "thumbv7a-pc-windows-msvc" } 122 | "Arm64" { return "aarch64-pc-windows-msvc" } 123 | } 124 | } catch { 125 | # The above was added in .NET 4.7.1, so Windows PowerShell in versions of Windows 126 | # prior to Windows 10 v1709 may not have this API. 127 | Write-Verbose "Get-TargetTriple: Exception when trying to determine OS architecture." 128 | Write-Verbose $_ 129 | } 130 | 131 | # This is available in .NET 4.0. We already checked for PS 5, which requires .NET 4.5. 132 | Write-Verbose("Get-TargetTriple: falling back to Is64BitOperatingSystem.") 133 | if ([System.Environment]::Is64BitOperatingSystem) { 134 | return "x86_64-pc-windows-msvc" 135 | } else { 136 | return "i686-pc-windows-msvc" 137 | } 138 | } 139 | 140 | function Download($download_url, $platforms) { 141 | $arch = Get-TargetTriple 142 | 143 | if (-not $platforms.ContainsKey($arch)) { 144 | $platforms_json = ConvertTo-Json $platforms 145 | throw "ERROR: could not find binaries for this platform. Last platform tried: $arch platform info: $platforms_json" 146 | } 147 | 148 | # Lookup what we expect this platform to look like 149 | $info = $platforms[$arch] 150 | $zip_ext = $info["zip_ext"] 151 | $bin_names = $info["bins"] 152 | $lib_names = $info["libs"] 153 | $staticlib_names = $info["staticlibs"] 154 | $artifact_name = $info["artifact_name"] 155 | 156 | # Make a new temp dir to unpack things to 157 | $tmp = New-Temp-Dir 158 | $dir_path = "$tmp\$app_name$zip_ext" 159 | 160 | # Download and unpack! 161 | $url = "$download_url/$artifact_name" 162 | Write-Information "Downloading $app_name $app_version ($arch)" 163 | Write-Verbose " from $url" 164 | Write-Verbose " to $dir_path" 165 | $wc = New-Object Net.Webclient 166 | $wc.downloadFile($url, $dir_path) 167 | 168 | Write-Verbose "Unpacking to $tmp" 169 | 170 | # Select the tool to unpack the files with. 171 | # 172 | # As of windows 10(?), powershell comes with tar preinstalled, but in practice 173 | # it only seems to support .tar.gz, and not xz/zstd. Still, we should try to 174 | # forward all tars to it in case the user has a machine that can handle it! 175 | switch -Wildcard ($zip_ext) { 176 | ".zip" { 177 | Expand-Archive -Path $dir_path -DestinationPath "$tmp"; 178 | Break 179 | } 180 | ".tar.*" { 181 | tar xf $dir_path --strip-components 1 -C "$tmp"; 182 | Break 183 | } 184 | Default { 185 | throw "ERROR: unknown archive format $zip_ext" 186 | } 187 | } 188 | 189 | # Let the next step know what to copy 190 | $bin_paths = @() 191 | foreach ($bin_name in $bin_names) { 192 | Write-Verbose " Unpacked $bin_name" 193 | $bin_paths += "$tmp\$bin_name" 194 | } 195 | $lib_paths = @() 196 | foreach ($lib_name in $lib_names) { 197 | Write-Verbose " Unpacked $lib_name" 198 | $lib_paths += "$tmp\$lib_name" 199 | } 200 | $staticlib_paths = @() 201 | foreach ($lib_name in $staticlib_names) { 202 | Write-Verbose " Unpacked $lib_name" 203 | $staticlib_paths += "$tmp\$lib_name" 204 | } 205 | 206 | if ($null -ne $info["updater"]) { 207 | $updater_id = $info["updater"]["artifact_name"] 208 | $updater_url = "$download_url/$updater_id" 209 | $out_name = "$tmp\uv-update.exe" 210 | 211 | $wc.downloadFile($updater_url, $out_name) 212 | $bin_paths += $out_name 213 | } 214 | 215 | return @{ 216 | "bin_paths" = $bin_paths 217 | "lib_paths" = $lib_paths 218 | "staticlib_paths" = $staticlib_paths 219 | } 220 | } 221 | 222 | function Invoke-Installer($artifacts, $platforms) { 223 | # Replaces the placeholder binary entry with the actual list of binaries 224 | $arch = Get-TargetTriple 225 | 226 | if (-not $platforms.ContainsKey($arch)) { 227 | $platforms_json = ConvertTo-Json $platforms 228 | throw "ERROR: could not find binaries for this platform. Last platform tried: $arch platform info: $platforms_json" 229 | } 230 | 231 | $info = $platforms[$arch] 232 | 233 | # Forces the install to occur at this path, not the default 234 | $force_install_dir = $null 235 | # Check the newer app-specific variable before falling back 236 | # to the older generic one 237 | if (($env:UV_INSTALL_DIR)) { 238 | $force_install_dir = $env:UV_INSTALL_DIR 239 | } elseif (($env:CARGO_DIST_FORCE_INSTALL_DIR)) { 240 | $force_install_dir = $env:CARGO_DIST_FORCE_INSTALL_DIR 241 | } 242 | 243 | # The actual path we're going to install to 244 | $dest_dir = $null 245 | $dest_dir_lib = $null 246 | # The install prefix we write to the receipt. 247 | # For organized install methods like CargoHome, which have 248 | # subdirectories, this is the root without `/bin`. For other 249 | # methods, this is the same as `_install_dir`. 250 | $receipt_dest_dir = $null 251 | # Before actually consulting the configured install strategy, see 252 | # if we're overriding it. 253 | if (($force_install_dir)) { 254 | 255 | $dest_dir = Join-Path $force_install_dir "bin" 256 | $dest_dir_lib = $dest_dir 257 | $receipt_dest_dir = $force_install_dir 258 | } 259 | if (-Not $dest_dir) { 260 | # first try $env:CARGO_HOME, then fallback to $HOME 261 | # (for whatever reason $HOME is not a normal env var and doesn't need the $env: prefix) 262 | $root = if (($base_dir = $env:CARGO_HOME)) { 263 | $base_dir 264 | } elseif (($base_dir = $HOME)) { 265 | Join-Path $base_dir ".cargo" 266 | } else { 267 | throw "ERROR: could not find your HOME dir or CARGO_HOME to install binaries to" 268 | } 269 | 270 | $dest_dir = Join-Path $root "bin" 271 | $dest_dir_lib = $dest_dir 272 | $receipt_dest_dir = $root 273 | } 274 | 275 | # Looks like all of the above assignments failed 276 | if (-Not $dest_dir) { 277 | throw "ERROR: could not find a valid path to install to; please check the installation instructions" 278 | } 279 | 280 | # The replace call here ensures proper escaping is inlined into the receipt 281 | $receipt = $receipt.Replace('AXO_INSTALL_PREFIX', $receipt_dest_dir.replace("\", "\\")) 282 | 283 | $dest_dir = New-Item -Force -ItemType Directory -Path $dest_dir 284 | $dest_dir_lib = New-Item -Force -ItemType Directory -Path $dest_dir_lib 285 | Write-Information "Installing to $dest_dir" 286 | # Just copy the binaries from the temp location to the install dir 287 | foreach ($bin_path in $artifacts["bin_paths"]) { 288 | $installed_file = Split-Path -Path "$bin_path" -Leaf 289 | Copy-Item "$bin_path" -Destination "$dest_dir" -ErrorAction Stop 290 | Remove-Item "$bin_path" -Recurse -Force -ErrorAction Stop 291 | Write-Information " $installed_file" 292 | 293 | if (($dests = $info["aliases"][$installed_file])) { 294 | $source = Join-Path "$dest_dir" "$installed_file" 295 | foreach ($dest_name in $dests) { 296 | $dest = Join-Path $dest_dir $dest_name 297 | $null = New-Item -ItemType HardLink -Target "$source" -Path "$dest" -Force -ErrorAction Stop 298 | } 299 | } 300 | } 301 | foreach ($lib_path in $artifacts["lib_paths"]) { 302 | $installed_file = Split-Path -Path "$lib_path" -Leaf 303 | Copy-Item "$lib_path" -Destination "$dest_dir_lib" -ErrorAction Stop 304 | Remove-Item "$lib_path" -Recurse -Force -ErrorAction Stop 305 | Write-Information " $installed_file" 306 | } 307 | foreach ($lib_path in $artifacts["staticlib_paths"]) { 308 | $installed_file = Split-Path -Path "$lib_path" -Leaf 309 | Copy-Item "$lib_path" -Destination "$dest_dir_lib" -ErrorAction Stop 310 | Remove-Item "$lib_path" -Recurse -Force -ErrorAction Stop 311 | Write-Information " $installed_file" 312 | } 313 | 314 | $formatted_bins = ($info["bins"] | ForEach-Object { '"' + $_ + '"' }) -join "," 315 | $receipt = $receipt.Replace('"CARGO_DIST_BINS"', $formatted_bins) 316 | $formatted_libs = ($info["libs"] | ForEach-Object { '"' + $_ + '"' }) -join "," 317 | $receipt = $receipt.Replace('"CARGO_DIST_DYLIBS"', $formatted_libs) 318 | $formatted_staticlibs = ($info["staticlibs"] | ForEach-Object { '"' + $_ + '"' }) -join "," 319 | $receipt = $receipt.Replace('"CARGO_DIST_STATICLIBS"', $formatted_staticlibs) 320 | # Also replace the aliases with the arch-specific one 321 | $receipt = $receipt.Replace('"binary_aliases":{}', -join('"binary_aliases":', $info['aliases_json'])) 322 | 323 | # Write the install receipt 324 | $null = New-Item -Path $receipt_home -ItemType "directory" -ErrorAction SilentlyContinue 325 | # Trying to get Powershell 5.1 (not 6+, which is fake and lies) to write utf8 is a crime 326 | # because "Out-File -Encoding utf8" actually still means utf8BOM, so we need to pull out 327 | # .NET's APIs which actually do what you tell them (also apparently utf8NoBOM is the 328 | # default in newer .NETs but I'd rather not rely on that at this point). 329 | $Utf8NoBomEncoding = New-Object System.Text.UTF8Encoding $False 330 | [IO.File]::WriteAllLines("$receipt_home/uv-receipt.json", "$receipt", $Utf8NoBomEncoding) 331 | 332 | # Respect the environment, but CLI takes precedence 333 | if ($null -eq $NoModifyPath) { 334 | $NoModifyPath = $env:INSTALLER_NO_MODIFY_PATH 335 | } 336 | 337 | Write-Information "everything's installed!" 338 | if (-not $NoModifyPath) { 339 | Add-Ci-Path $dest_dir 340 | if (Add-Path $dest_dir) { 341 | Write-Information "" 342 | Write-Information "To add $dest_dir to your PATH, either restart your system or run:" 343 | Write-Information "" 344 | Write-Information " set Path=$dest_dir;%Path% (cmd)" 345 | Write-Information " `$env:Path = `"$dest_dir;`$env:Path`" (powershell)" 346 | } 347 | } 348 | } 349 | 350 | # Attempt to do CI-specific rituals to get the install-dir on PATH faster 351 | function Add-Ci-Path($OrigPathToAdd) { 352 | # If GITHUB_PATH is present, then write install_dir to the file it refs. 353 | # After each GitHub Action, the contents will be added to PATH. 354 | # So if you put a curl | sh for this script in its own "run" step, 355 | # the next step will have this dir on PATH. 356 | # 357 | # Note that GITHUB_PATH will not resolve any variables, so we in fact 358 | # want to write the install dir and not an expression that evals to it 359 | if (($gh_path = $env:GITHUB_PATH)) { 360 | Write-Output "$OrigPathToAdd" | Out-File -FilePath "$gh_path" -Encoding utf8 -Append 361 | } 362 | } 363 | 364 | # Try to add the given path to PATH via the registry 365 | # 366 | # Returns true if the registry was modified, otherwise returns false 367 | # (indicating it was already on PATH) 368 | function Add-Path($OrigPathToAdd) { 369 | Write-Verbose "Adding $OrigPathToAdd to your PATH" 370 | $RegistryPath = "HKCU:\Environment" 371 | $PropertyName = "Path" 372 | $PathToAdd = $OrigPathToAdd 373 | 374 | $Item = if (Test-Path $RegistryPath) { 375 | # If the registry key exists, get it 376 | Get-Item -Path $RegistryPath 377 | } else { 378 | # If the registry key doesn't exist, create it 379 | Write-Verbose "Creating $RegistryPath" 380 | New-Item -Path $RegistryPath -Force 381 | } 382 | 383 | $OldPath = "" 384 | try { 385 | # Try to get the old PATH value. If that fails, assume we're making it from scratch. 386 | # Otherwise assume there's already paths in here and use a ; separator 387 | $OldPath = $Item | Get-ItemPropertyValue -Name $PropertyName 388 | $PathToAdd = "$PathToAdd;" 389 | } catch { 390 | # We'll be creating the PATH from scratch 391 | Write-Verbose "No $PropertyName Property exists on $RegistryPath (we'll make one)" 392 | } 393 | 394 | # Check if the path is already there 395 | # 396 | # We don't want to incorrectly match "C:\blah\" to "C:\blah\blah\", so we include the semicolon 397 | # delimiters when searching, ensuring exact matches. To avoid corner cases we add semicolons to 398 | # both sides of the input, allowing us to pretend we're always in the middle of a list. 399 | Write-Verbose "Old $PropertyName Property is $OldPath" 400 | if (";$OldPath;" -like "*;$OrigPathToAdd;*") { 401 | # Already on path, nothing to do 402 | Write-Verbose "install dir already on PATH, all done!" 403 | return $false 404 | } else { 405 | # Actually update PATH 406 | Write-Verbose "Actually mutating $PropertyName Property" 407 | $NewPath = $PathToAdd + $OldPath 408 | # We use -Force here to make the value already existing not be an error 409 | $Item | New-ItemProperty -Name $PropertyName -Value $NewPath -PropertyType String -Force | Out-Null 410 | return $true 411 | } 412 | } 413 | 414 | function Initialize-Environment() { 415 | If (($PSVersionTable.PSVersion.Major) -lt 5) { 416 | throw @" 417 | Error: PowerShell 5 or later is required to install $app_name. 418 | Upgrade PowerShell: 419 | 420 | https://docs.microsoft.com/en-us/powershell/scripting/setup/installing-windows-powershell 421 | 422 | "@ 423 | } 424 | 425 | # show notification to change execution policy: 426 | $allowedExecutionPolicy = @('Unrestricted', 'RemoteSigned', 'ByPass') 427 | If ((Get-ExecutionPolicy).ToString() -notin $allowedExecutionPolicy) { 428 | throw @" 429 | Error: PowerShell requires an execution policy in [$($allowedExecutionPolicy -join ", ")] to run $app_name. For example, to set the execution policy to 'RemoteSigned' please run: 430 | 431 | Set-ExecutionPolicy RemoteSigned -scope CurrentUser 432 | 433 | "@ 434 | } 435 | 436 | # GitHub requires TLS 1.2 437 | If ([System.Enum]::GetNames([System.Net.SecurityProtocolType]) -notcontains 'Tls12') { 438 | throw @" 439 | Error: Installing $app_name requires at least .NET Framework 4.5 440 | Please download and install it first: 441 | 442 | https://www.microsoft.com/net/download 443 | 444 | "@ 445 | } 446 | } 447 | 448 | function New-Temp-Dir() { 449 | [CmdletBinding(SupportsShouldProcess)] 450 | param() 451 | $parent = [System.IO.Path]::GetTempPath() 452 | [string] $name = [System.Guid]::NewGuid() 453 | New-Item -ItemType Directory -Path (Join-Path $parent $name) 454 | } 455 | 456 | # PSScriptAnalyzer doesn't like how we use our params as globals, this calms it 457 | $Null = $ArtifactDownloadUrl, $NoModifyPath, $Help 458 | # Make Write-Information statements be visible 459 | $InformationPreference = "Continue" 460 | 461 | # The default interactive handler 462 | try { 463 | Install-Binary "$Args" 464 | } catch { 465 | Write-Information $_ 466 | exit 1 467 | } 468 | -------------------------------------------------------------------------------- /uv-installer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # shellcheck shell=dash 3 | # 4 | # Licensed under the MIT license 5 | # , at your 6 | # option. This file may not be copied, modified, or distributed 7 | # except according to those terms. 8 | 9 | if [ "$KSH_VERSION" = 'Version JM 93t+ 2010-03-05' ]; then 10 | # The version of ksh93 that ships with many illumos systems does not 11 | # support the "local" extension. Print a message rather than fail in 12 | # subtle ways later on: 13 | echo 'this installer does not work with this ksh93 version; please try bash!' >&2 14 | exit 1 15 | fi 16 | 17 | set -u 18 | 19 | APP_NAME="uv" 20 | APP_VERSION="0.4.21" 21 | ARTIFACT_DOWNLOAD_URL="${INSTALLER_DOWNLOAD_URL:-https://github.com/astral-sh/uv/releases/download/0.4.21}" 22 | PRINT_VERBOSE=${INSTALLER_PRINT_VERBOSE:-0} 23 | PRINT_QUIET=${INSTALLER_PRINT_QUIET:-0} 24 | NO_MODIFY_PATH=${INSTALLER_NO_MODIFY_PATH:-0} 25 | read -r RECEIPT <&2 345 | say_verbose " from $_url" 1>&2 346 | say_verbose " to $_file" 1>&2 347 | 348 | ensure mkdir -p "$_dir" 349 | 350 | if ! downloader "$_url" "$_file"; then 351 | say "failed to download $_url" 352 | say "this may be a standard network error, but it may also indicate" 353 | say "that $APP_NAME's release process is not working. When in doubt" 354 | say "please feel free to open an issue!" 355 | exit 1 356 | fi 357 | 358 | # ...and then the updater, if it exists 359 | if [ -n "$_updater_name" ]; then 360 | local _updater_url="$ARTIFACT_DOWNLOAD_URL/$_updater_name" 361 | # This renames the artifact while doing the download, removing the 362 | # target triple and leaving just the appname-update format 363 | local _updater_file="$_dir/$APP_NAME-update" 364 | 365 | if ! downloader "$_updater_url" "$_updater_file"; then 366 | say "failed to download $_updater_url" 367 | say "this may be a standard network error, but it may also indicate" 368 | say "that $APP_NAME's release process is not working. When in doubt" 369 | say "please feel free to open an issue!" 370 | exit 1 371 | fi 372 | 373 | # Add the updater to the list of binaries to install 374 | _bins="$_bins $APP_NAME-update" 375 | fi 376 | 377 | # unpack the archive 378 | case "$_zip_ext" in 379 | ".zip") 380 | ensure unzip -q "$_file" -d "$_dir" 381 | ;; 382 | 383 | ".tar."*) 384 | ensure tar xf "$_file" --strip-components 1 -C "$_dir" 385 | ;; 386 | *) 387 | err "unknown archive format: $_zip_ext" 388 | ;; 389 | esac 390 | 391 | install "$_dir" "$_bins" "$_libs" "$_staticlibs" "$_arch" "$@" 392 | local _retval=$? 393 | if [ "$_retval" != 0 ]; then 394 | return "$_retval" 395 | fi 396 | 397 | ignore rm -rf "$_dir" 398 | 399 | # Install the install receipt 400 | mkdir -p "$RECEIPT_HOME" || { 401 | err "unable to create receipt directory at $RECEIPT_HOME" 402 | } 403 | echo "$RECEIPT" > "$RECEIPT_HOME/$APP_NAME-receipt.json" 404 | # shellcheck disable=SC2320 405 | local _retval=$? 406 | 407 | return "$_retval" 408 | } 409 | 410 | # Replaces $HOME with the variable name for display to the user, 411 | # only if $HOME is defined. 412 | replace_home() { 413 | local _str="$1" 414 | 415 | if [ -n "${HOME:-}" ]; then 416 | echo "$_str" | sed "s,$HOME,\$HOME," 417 | else 418 | echo "$_str" 419 | fi 420 | } 421 | 422 | json_binary_aliases() { 423 | local _arch="$1" 424 | 425 | case "$_arch" in 426 | "aarch64-apple-darwin") 427 | echo '{}' 428 | ;; 429 | "aarch64-unknown-linux-gnu") 430 | echo '{}' 431 | ;; 432 | "aarch64-unknown-linux-musl-dynamic") 433 | echo '{}' 434 | ;; 435 | "aarch64-unknown-linux-musl-static") 436 | echo '{}' 437 | ;; 438 | "arm-unknown-linux-gnueabihf") 439 | echo '{}' 440 | ;; 441 | "arm-unknown-linux-musl-dynamiceabihf") 442 | echo '{}' 443 | ;; 444 | "arm-unknown-linux-musl-staticeabihf") 445 | echo '{}' 446 | ;; 447 | "armv7-unknown-linux-gnueabihf") 448 | echo '{}' 449 | ;; 450 | "armv7-unknown-linux-musl-dynamiceabihf") 451 | echo '{}' 452 | ;; 453 | "armv7-unknown-linux-musl-staticeabihf") 454 | echo '{}' 455 | ;; 456 | "i686-pc-windows-gnu") 457 | echo '{}' 458 | ;; 459 | "i686-unknown-linux-gnu") 460 | echo '{}' 461 | ;; 462 | "i686-unknown-linux-musl-dynamic") 463 | echo '{}' 464 | ;; 465 | "i686-unknown-linux-musl-static") 466 | echo '{}' 467 | ;; 468 | "powerpc64-unknown-linux-gnu") 469 | echo '{}' 470 | ;; 471 | "powerpc64le-unknown-linux-gnu") 472 | echo '{}' 473 | ;; 474 | "s390x-unknown-linux-gnu") 475 | echo '{}' 476 | ;; 477 | "x86_64-apple-darwin") 478 | echo '{}' 479 | ;; 480 | "x86_64-pc-windows-gnu") 481 | echo '{}' 482 | ;; 483 | "x86_64-unknown-linux-gnu") 484 | echo '{}' 485 | ;; 486 | "x86_64-unknown-linux-musl-dynamic") 487 | echo '{}' 488 | ;; 489 | "x86_64-unknown-linux-musl-static") 490 | echo '{}' 491 | ;; 492 | *) 493 | echo '{}' 494 | ;; 495 | esac 496 | } 497 | 498 | aliases_for_binary() { 499 | local _bin="$1" 500 | local _arch="$2" 501 | 502 | case "$_arch" in 503 | "aarch64-apple-darwin") 504 | case "$_bin" in 505 | *) 506 | echo "" 507 | ;; 508 | esac 509 | ;; 510 | "aarch64-unknown-linux-gnu") 511 | case "$_bin" in 512 | *) 513 | echo "" 514 | ;; 515 | esac 516 | ;; 517 | "aarch64-unknown-linux-musl-dynamic") 518 | case "$_bin" in 519 | *) 520 | echo "" 521 | ;; 522 | esac 523 | ;; 524 | "aarch64-unknown-linux-musl-static") 525 | case "$_bin" in 526 | *) 527 | echo "" 528 | ;; 529 | esac 530 | ;; 531 | "arm-unknown-linux-gnueabihf") 532 | case "$_bin" in 533 | *) 534 | echo "" 535 | ;; 536 | esac 537 | ;; 538 | "arm-unknown-linux-musl-dynamiceabihf") 539 | case "$_bin" in 540 | *) 541 | echo "" 542 | ;; 543 | esac 544 | ;; 545 | "arm-unknown-linux-musl-staticeabihf") 546 | case "$_bin" in 547 | *) 548 | echo "" 549 | ;; 550 | esac 551 | ;; 552 | "armv7-unknown-linux-gnueabihf") 553 | case "$_bin" in 554 | *) 555 | echo "" 556 | ;; 557 | esac 558 | ;; 559 | "armv7-unknown-linux-musl-dynamiceabihf") 560 | case "$_bin" in 561 | *) 562 | echo "" 563 | ;; 564 | esac 565 | ;; 566 | "armv7-unknown-linux-musl-staticeabihf") 567 | case "$_bin" in 568 | *) 569 | echo "" 570 | ;; 571 | esac 572 | ;; 573 | "i686-pc-windows-gnu") 574 | case "$_bin" in 575 | *) 576 | echo "" 577 | ;; 578 | esac 579 | ;; 580 | "i686-unknown-linux-gnu") 581 | case "$_bin" in 582 | *) 583 | echo "" 584 | ;; 585 | esac 586 | ;; 587 | "i686-unknown-linux-musl-dynamic") 588 | case "$_bin" in 589 | *) 590 | echo "" 591 | ;; 592 | esac 593 | ;; 594 | "i686-unknown-linux-musl-static") 595 | case "$_bin" in 596 | *) 597 | echo "" 598 | ;; 599 | esac 600 | ;; 601 | "powerpc64-unknown-linux-gnu") 602 | case "$_bin" in 603 | *) 604 | echo "" 605 | ;; 606 | esac 607 | ;; 608 | "powerpc64le-unknown-linux-gnu") 609 | case "$_bin" in 610 | *) 611 | echo "" 612 | ;; 613 | esac 614 | ;; 615 | "s390x-unknown-linux-gnu") 616 | case "$_bin" in 617 | *) 618 | echo "" 619 | ;; 620 | esac 621 | ;; 622 | "x86_64-apple-darwin") 623 | case "$_bin" in 624 | *) 625 | echo "" 626 | ;; 627 | esac 628 | ;; 629 | "x86_64-pc-windows-gnu") 630 | case "$_bin" in 631 | *) 632 | echo "" 633 | ;; 634 | esac 635 | ;; 636 | "x86_64-unknown-linux-gnu") 637 | case "$_bin" in 638 | *) 639 | echo "" 640 | ;; 641 | esac 642 | ;; 643 | "x86_64-unknown-linux-musl-dynamic") 644 | case "$_bin" in 645 | *) 646 | echo "" 647 | ;; 648 | esac 649 | ;; 650 | "x86_64-unknown-linux-musl-static") 651 | case "$_bin" in 652 | *) 653 | echo "" 654 | ;; 655 | esac 656 | ;; 657 | *) 658 | echo "" 659 | ;; 660 | esac 661 | } 662 | 663 | select_archive_for_arch() { 664 | local _true_arch="$1" 665 | local _archive 666 | case "$_true_arch" in 667 | "aarch64-apple-darwin") 668 | _archive="uv-aarch64-apple-darwin.tar.gz" 669 | if [ -n "$_archive" ]; then 670 | echo "$_archive" 671 | return 0 672 | fi 673 | _archive="uv-x86_64-apple-darwin.tar.gz" 674 | if [ -n "$_archive" ]; then 675 | echo "$_archive" 676 | return 0 677 | fi 678 | ;; 679 | "aarch64-pc-windows-msvc") 680 | _archive="uv-x86_64-pc-windows-msvc.zip" 681 | if [ -n "$_archive" ]; then 682 | echo "$_archive" 683 | return 0 684 | fi 685 | _archive="uv-i686-pc-windows-msvc.zip" 686 | if [ -n "$_archive" ]; then 687 | echo "$_archive" 688 | return 0 689 | fi 690 | ;; 691 | "aarch64-unknown-linux-gnu") 692 | _archive="uv-aarch64-unknown-linux-gnu.tar.gz" 693 | if ! check_glibc "2" "31"; then 694 | _archive="" 695 | fi 696 | if [ -n "$_archive" ]; then 697 | echo "$_archive" 698 | return 0 699 | fi 700 | _archive="uv-aarch64-unknown-linux-musl.tar.gz" 701 | if [ -n "$_archive" ]; then 702 | echo "$_archive" 703 | return 0 704 | fi 705 | ;; 706 | "aarch64-unknown-linux-musl-dynamic") 707 | _archive="uv-aarch64-unknown-linux-musl.tar.gz" 708 | if [ -n "$_archive" ]; then 709 | echo "$_archive" 710 | return 0 711 | fi 712 | ;; 713 | "aarch64-unknown-linux-musl-static") 714 | _archive="uv-aarch64-unknown-linux-musl.tar.gz" 715 | if [ -n "$_archive" ]; then 716 | echo "$_archive" 717 | return 0 718 | fi 719 | ;; 720 | "arm-unknown-linux-gnueabihf") 721 | _archive="uv-arm-unknown-linux-musleabihf.tar.gz" 722 | if [ -n "$_archive" ]; then 723 | echo "$_archive" 724 | return 0 725 | fi 726 | ;; 727 | "arm-unknown-linux-musl-dynamiceabihf") 728 | _archive="uv-arm-unknown-linux-musleabihf.tar.gz" 729 | if [ -n "$_archive" ]; then 730 | echo "$_archive" 731 | return 0 732 | fi 733 | ;; 734 | "arm-unknown-linux-musl-staticeabihf") 735 | _archive="uv-arm-unknown-linux-musleabihf.tar.gz" 736 | if [ -n "$_archive" ]; then 737 | echo "$_archive" 738 | return 0 739 | fi 740 | ;; 741 | "armv7-unknown-linux-gnueabihf") 742 | _archive="uv-armv7-unknown-linux-gnueabihf.tar.gz" 743 | if ! check_glibc "2" "31"; then 744 | _archive="" 745 | fi 746 | if [ -n "$_archive" ]; then 747 | echo "$_archive" 748 | return 0 749 | fi 750 | _archive="uv-armv7-unknown-linux-musleabihf.tar.gz" 751 | if [ -n "$_archive" ]; then 752 | echo "$_archive" 753 | return 0 754 | fi 755 | ;; 756 | "armv7-unknown-linux-musl-dynamiceabihf") 757 | _archive="uv-armv7-unknown-linux-musleabihf.tar.gz" 758 | if [ -n "$_archive" ]; then 759 | echo "$_archive" 760 | return 0 761 | fi 762 | ;; 763 | "armv7-unknown-linux-musl-staticeabihf") 764 | _archive="uv-armv7-unknown-linux-musleabihf.tar.gz" 765 | if [ -n "$_archive" ]; then 766 | echo "$_archive" 767 | return 0 768 | fi 769 | ;; 770 | "i686-pc-windows-gnu") 771 | _archive="uv-i686-pc-windows-msvc.zip" 772 | if [ -n "$_archive" ]; then 773 | echo "$_archive" 774 | return 0 775 | fi 776 | ;; 777 | "i686-pc-windows-msvc") 778 | _archive="uv-i686-pc-windows-msvc.zip" 779 | if [ -n "$_archive" ]; then 780 | echo "$_archive" 781 | return 0 782 | fi 783 | ;; 784 | "i686-unknown-linux-gnu") 785 | _archive="uv-i686-unknown-linux-gnu.tar.gz" 786 | if ! check_glibc "2" "31"; then 787 | _archive="" 788 | fi 789 | if [ -n "$_archive" ]; then 790 | echo "$_archive" 791 | return 0 792 | fi 793 | _archive="uv-i686-unknown-linux-musl.tar.gz" 794 | if [ -n "$_archive" ]; then 795 | echo "$_archive" 796 | return 0 797 | fi 798 | ;; 799 | "i686-unknown-linux-musl-dynamic") 800 | _archive="uv-i686-unknown-linux-musl.tar.gz" 801 | if [ -n "$_archive" ]; then 802 | echo "$_archive" 803 | return 0 804 | fi 805 | ;; 806 | "i686-unknown-linux-musl-static") 807 | _archive="uv-i686-unknown-linux-musl.tar.gz" 808 | if [ -n "$_archive" ]; then 809 | echo "$_archive" 810 | return 0 811 | fi 812 | ;; 813 | "powerpc64-unknown-linux-gnu") 814 | _archive="uv-powerpc64-unknown-linux-gnu.tar.gz" 815 | if ! check_glibc "2" "31"; then 816 | _archive="" 817 | fi 818 | if [ -n "$_archive" ]; then 819 | echo "$_archive" 820 | return 0 821 | fi 822 | ;; 823 | "powerpc64le-unknown-linux-gnu") 824 | _archive="uv-powerpc64le-unknown-linux-gnu.tar.gz" 825 | if ! check_glibc "2" "31"; then 826 | _archive="" 827 | fi 828 | if [ -n "$_archive" ]; then 829 | echo "$_archive" 830 | return 0 831 | fi 832 | ;; 833 | "s390x-unknown-linux-gnu") 834 | _archive="uv-s390x-unknown-linux-gnu.tar.gz" 835 | if ! check_glibc "2" "31"; then 836 | _archive="" 837 | fi 838 | if [ -n "$_archive" ]; then 839 | echo "$_archive" 840 | return 0 841 | fi 842 | ;; 843 | "x86_64-apple-darwin") 844 | _archive="uv-x86_64-apple-darwin.tar.gz" 845 | if [ -n "$_archive" ]; then 846 | echo "$_archive" 847 | return 0 848 | fi 849 | ;; 850 | "x86_64-pc-windows-gnu") 851 | _archive="uv-x86_64-pc-windows-msvc.zip" 852 | if [ -n "$_archive" ]; then 853 | echo "$_archive" 854 | return 0 855 | fi 856 | ;; 857 | "x86_64-pc-windows-msvc") 858 | _archive="uv-x86_64-pc-windows-msvc.zip" 859 | if [ -n "$_archive" ]; then 860 | echo "$_archive" 861 | return 0 862 | fi 863 | _archive="uv-i686-pc-windows-msvc.zip" 864 | if [ -n "$_archive" ]; then 865 | echo "$_archive" 866 | return 0 867 | fi 868 | ;; 869 | "x86_64-unknown-linux-gnu") 870 | _archive="uv-x86_64-unknown-linux-gnu.tar.gz" 871 | if ! check_glibc "2" "31"; then 872 | _archive="" 873 | fi 874 | if [ -n "$_archive" ]; then 875 | echo "$_archive" 876 | return 0 877 | fi 878 | _archive="uv-x86_64-unknown-linux-musl.tar.gz" 879 | if [ -n "$_archive" ]; then 880 | echo "$_archive" 881 | return 0 882 | fi 883 | ;; 884 | "x86_64-unknown-linux-musl-dynamic") 885 | _archive="uv-x86_64-unknown-linux-musl.tar.gz" 886 | if [ -n "$_archive" ]; then 887 | echo "$_archive" 888 | return 0 889 | fi 890 | ;; 891 | "x86_64-unknown-linux-musl-static") 892 | _archive="uv-x86_64-unknown-linux-musl.tar.gz" 893 | if [ -n "$_archive" ]; then 894 | echo "$_archive" 895 | return 0 896 | fi 897 | ;; 898 | *) 899 | err "there isn't a download for your platform $_true_arch" 900 | ;; 901 | esac 902 | err "no compatible downloads were found for your platform $_true_arch" 903 | } 904 | 905 | check_glibc() { 906 | local _min_glibc_major="$1" 907 | local _min_glibc_series="$2" 908 | 909 | # Parsing version out from line 1 like: 910 | # ldd (Ubuntu GLIBC 2.35-0ubuntu3.1) 2.35 911 | _local_glibc="$(ldd --version | awk -F' ' '{ if (FNR<=1) print $NF }')" 912 | 913 | if [ "$(echo "${_local_glibc}" | awk -F. '{ print $1 }')" = "$_min_glibc_major" ] && [ "$(echo "${_local_glibc}" | awk -F. '{ print $2 }')" -ge "$_min_glibc_series" ]; then 914 | return 0 915 | else 916 | say "System glibc version (\`${_local_glibc}') is too old; checking alternatives" >&2 917 | return 1 918 | fi 919 | } 920 | 921 | # See discussion of late-bound vs early-bound for why we use single-quotes with env vars 922 | # shellcheck disable=SC2016 923 | install() { 924 | # This code needs to both compute certain paths for itself to write to, and 925 | # also write them to shell/rc files so that they can look them up to e.g. 926 | # add them to PATH. This requires an active distinction between paths 927 | # and expressions that can compute them. 928 | # 929 | # The distinction lies in when we want env-vars to be evaluated. For instance 930 | # if we determine that we want to install to $HOME/.myapp, which do we add 931 | # to e.g. $HOME/.profile: 932 | # 933 | # * early-bound: export PATH="/home/myuser/.myapp:$PATH" 934 | # * late-bound: export PATH="$HOME/.myapp:$PATH" 935 | # 936 | # In this case most people would prefer the late-bound version, but in other 937 | # cases the early-bound version might be a better idea. In particular when using 938 | # other env-vars than $HOME, they are more likely to be only set temporarily 939 | # for the duration of this install script, so it's more advisable to erase their 940 | # existence with early-bounding. 941 | # 942 | # This distinction is handled by "double-quotes" (early) vs 'single-quotes' (late). 943 | # 944 | # However if we detect that "$SOME_VAR/..." is a subdir of $HOME, we try to rewrite 945 | # it to be '$HOME/...' to get the best of both worlds. 946 | # 947 | # This script has a few different variants, the most complex one being the 948 | # CARGO_HOME version which attempts to install things to Cargo's bin dir, 949 | # potentially setting up a minimal version if the user hasn't ever installed Cargo. 950 | # 951 | # In this case we need to: 952 | # 953 | # * Install to $HOME/.cargo/bin/ 954 | # * Create a shell script at $HOME/.cargo/env that: 955 | # * Checks if $HOME/.cargo/bin/ is on PATH 956 | # * and if not prepends it to PATH 957 | # * Edits $HOME/.profile to run $HOME/.cargo/env (if the line doesn't exist) 958 | # 959 | # To do this we need these 4 values: 960 | 961 | # The actual path we're going to install to 962 | local _install_dir 963 | # The directory C dynamic/static libraries install to 964 | local _lib_install_dir 965 | # The install prefix we write to the receipt. 966 | # For organized install methods like CargoHome, which have 967 | # subdirectories, this is the root without `/bin`. For other 968 | # methods, this is the same as `_install_dir`. 969 | local _receipt_install_dir 970 | # Path to the an shell script that adds install_dir to PATH 971 | local _env_script_path 972 | # Potentially-late-bound version of install_dir to write env_script 973 | local _install_dir_expr 974 | # Potentially-late-bound version of env_script_path to write to rcfiles like $HOME/.profile 975 | local _env_script_path_expr 976 | # Forces the install to occur at this path, not the default 977 | local _force_install_dir 978 | 979 | # Check the newer app-specific variable before falling back 980 | # to the older generic one 981 | if [ -n "${UV_INSTALL_DIR:-}" ]; then 982 | _force_install_dir="$UV_INSTALL_DIR" 983 | elif [ -n "${CARGO_DIST_FORCE_INSTALL_DIR:-}" ]; then 984 | _force_install_dir="$CARGO_DIST_FORCE_INSTALL_DIR" 985 | fi 986 | 987 | # Before actually consulting the configured install strategy, see 988 | # if we're overriding it. 989 | if [ -n "${_force_install_dir:-}" ]; then 990 | _install_dir="$_force_install_dir/bin" 991 | _lib_install_dir="$_force_install_dir/bin" 992 | _receipt_install_dir="$_force_install_dir" 993 | _env_script_path="$_force_install_dir/env" 994 | _install_dir_expr="$(replace_home "$_force_install_dir/bin")" 995 | _env_script_path_expr="$(replace_home "$_force_install_dir/env")" 996 | fi 997 | if [ -z "${_install_dir:-}" ]; then 998 | # first try $CARGO_HOME, then fallback to $HOME/.cargo 999 | if [ -n "${CARGO_HOME:-}" ]; then 1000 | _receipt_install_dir="$CARGO_HOME" 1001 | _install_dir="$CARGO_HOME/bin" 1002 | _lib_install_dir="$CARGO_HOME/bin" 1003 | _env_script_path="$CARGO_HOME/env" 1004 | # Initially make this early-bound to erase the potentially-temporary env-var 1005 | _install_dir_expr="$_install_dir" 1006 | _env_script_path_expr="$_env_script_path" 1007 | # If CARGO_HOME was set but it ended up being the default $HOME-based path, 1008 | # then keep things late-bound. Otherwise bake the value for safety. 1009 | # This is what rustup does, and accurately reproducing it is useful. 1010 | if [ -n "${HOME:-}" ]; then 1011 | if [ "$HOME/.cargo/bin" = "$_install_dir" ]; then 1012 | _install_dir_expr='$HOME/.cargo/bin' 1013 | _env_script_path_expr='$HOME/.cargo/env' 1014 | fi 1015 | fi 1016 | elif [ -n "${HOME:-}" ]; then 1017 | _receipt_install_dir="$HOME/.cargo" 1018 | _install_dir="$HOME/.cargo/bin" 1019 | _lib_install_dir="$HOME/.cargo/bin" 1020 | _env_script_path="$HOME/.cargo/env" 1021 | _install_dir_expr='$HOME/.cargo/bin' 1022 | _env_script_path_expr='$HOME/.cargo/env' 1023 | fi 1024 | fi 1025 | 1026 | if [ -z "$_install_dir_expr" ]; then 1027 | err "could not find a valid path to install to!" 1028 | fi 1029 | 1030 | # Identical to the sh version, just with a .fish file extension 1031 | # We place it down here to wait until it's been assigned in every 1032 | # path. 1033 | _fish_env_script_path="${_env_script_path}.fish" 1034 | _fish_env_script_path_expr="${_env_script_path_expr}.fish" 1035 | 1036 | # Replace the temporary cargo home with the calculated one 1037 | RECEIPT=$(echo "$RECEIPT" | sed "s,AXO_INSTALL_PREFIX,$_receipt_install_dir,") 1038 | # Also replace the aliases with the arch-specific one 1039 | RECEIPT=$(echo "$RECEIPT" | sed "s'\"binary_aliases\":{}'\"binary_aliases\":$(json_binary_aliases "$_arch")'") 1040 | 1041 | say "installing to $_install_dir" 1042 | ensure mkdir -p "$_install_dir" 1043 | ensure mkdir -p "$_lib_install_dir" 1044 | 1045 | # copy all the binaries to the install dir 1046 | local _src_dir="$1" 1047 | local _bins="$2" 1048 | local _libs="$3" 1049 | local _staticlibs="$4" 1050 | local _arch="$5" 1051 | for _bin_name in $_bins; do 1052 | local _bin="$_src_dir/$_bin_name" 1053 | ensure mv "$_bin" "$_install_dir" 1054 | # unzip seems to need this chmod 1055 | ensure chmod +x "$_install_dir/$_bin_name" 1056 | for _dest in $(aliases_for_binary "$_bin_name" "$_arch"); do 1057 | ln -sf "$_install_dir/$_bin_name" "$_install_dir/$_dest" 1058 | done 1059 | say " $_bin_name" 1060 | done 1061 | # Like the above, but no aliases 1062 | for _lib_name in $_libs; do 1063 | local _lib="$_src_dir/$_lib_name" 1064 | ensure mv "$_lib" "$_lib_install_dir" 1065 | # unzip seems to need this chmod 1066 | ensure chmod +x "$_lib_install_dir/$_lib_name" 1067 | say " $_lib_name" 1068 | done 1069 | for _lib_name in $_staticlibs; do 1070 | local _lib="$_src_dir/$_lib_name" 1071 | ensure mv "$_lib" "$_lib_install_dir" 1072 | # unzip seems to need this chmod 1073 | ensure chmod +x "$_lib_install_dir/$_lib_name" 1074 | say " $_lib_name" 1075 | done 1076 | 1077 | say "everything's installed!" 1078 | 1079 | # Avoid modifying the users PATH if they are managing their PATH manually 1080 | case :$PATH: 1081 | in *:$_install_dir:*) NO_MODIFY_PATH=1 ;; 1082 | *) ;; 1083 | esac 1084 | 1085 | if [ "0" = "$NO_MODIFY_PATH" ]; then 1086 | add_install_dir_to_ci_path "$_install_dir" 1087 | add_install_dir_to_path "$_install_dir_expr" "$_env_script_path" "$_env_script_path_expr" ".profile" "sh" 1088 | exit1=$? 1089 | shotgun_install_dir_to_path "$_install_dir_expr" "$_env_script_path" "$_env_script_path_expr" ".profile .bashrc .bash_profile .bash_login" "sh" 1090 | exit2=$? 1091 | add_install_dir_to_path "$_install_dir_expr" "$_env_script_path" "$_env_script_path_expr" ".zshrc .zshenv" "sh" 1092 | exit3=$? 1093 | # This path may not exist by default 1094 | ensure mkdir -p "$HOME/.config/fish/conf.d" 1095 | exit4=$? 1096 | add_install_dir_to_path "$_install_dir_expr" "$_fish_env_script_path" "$_fish_env_script_path_expr" ".config/fish/conf.d/$APP_NAME.env.fish" "fish" 1097 | exit5=$? 1098 | 1099 | if [ "${exit1:-0}" = 1 ] || [ "${exit2:-0}" = 1 ] || [ "${exit3:-0}" = 1 ] || [ "${exit4:-0}" = 1 ] || [ "${exit5:-0}" = 1 ]; then 1100 | say "" 1101 | say "To add $_install_dir_expr to your PATH, either restart your shell or run:" 1102 | say "" 1103 | say " source $_env_script_path_expr (sh, bash, zsh)" 1104 | say " source $_fish_env_script_path_expr (fish)" 1105 | fi 1106 | fi 1107 | } 1108 | 1109 | print_home_for_script() { 1110 | local script="$1" 1111 | 1112 | local _home 1113 | case "$script" in 1114 | # zsh has a special ZDOTDIR directory, which if set 1115 | # should be considered instead of $HOME 1116 | .zsh*) 1117 | if [ -n "${ZDOTDIR:-}" ]; then 1118 | _home="$ZDOTDIR" 1119 | else 1120 | _home="$HOME" 1121 | fi 1122 | ;; 1123 | *) 1124 | _home="$HOME" 1125 | ;; 1126 | esac 1127 | 1128 | echo "$_home" 1129 | } 1130 | 1131 | add_install_dir_to_ci_path() { 1132 | # Attempt to do CI-specific rituals to get the install-dir on PATH faster 1133 | local _install_dir="$1" 1134 | 1135 | # If GITHUB_PATH is present, then write install_dir to the file it refs. 1136 | # After each GitHub Action, the contents will be added to PATH. 1137 | # So if you put a curl | sh for this script in its own "run" step, 1138 | # the next step will have this dir on PATH. 1139 | # 1140 | # Note that GITHUB_PATH will not resolve any variables, so we in fact 1141 | # want to write install_dir and not install_dir_expr 1142 | if [ -n "${GITHUB_PATH:-}" ]; then 1143 | ensure echo "$_install_dir" >> "$GITHUB_PATH" 1144 | fi 1145 | } 1146 | 1147 | add_install_dir_to_path() { 1148 | # Edit rcfiles ($HOME/.profile) to add install_dir to $PATH 1149 | # 1150 | # We do this slightly indirectly by creating an "env" shell script which checks if install_dir 1151 | # is on $PATH already, and prepends it if not. The actual line we then add to rcfiles 1152 | # is to just source that script. This allows us to blast it into lots of different rcfiles and 1153 | # have it run multiple times without causing problems. It's also specifically compatible 1154 | # with the system rustup uses, so that we don't conflict with it. 1155 | local _install_dir_expr="$1" 1156 | local _env_script_path="$2" 1157 | local _env_script_path_expr="$3" 1158 | local _rcfiles="$4" 1159 | local _shell="$5" 1160 | 1161 | if [ -n "${HOME:-}" ]; then 1162 | local _target 1163 | local _home 1164 | 1165 | # Find the first file in the array that exists and choose 1166 | # that as our target to write to 1167 | for _rcfile_relative in $_rcfiles; do 1168 | _home="$(print_home_for_script "$_rcfile_relative")" 1169 | local _rcfile="$_home/$_rcfile_relative" 1170 | 1171 | if [ -f "$_rcfile" ]; then 1172 | _target="$_rcfile" 1173 | break 1174 | fi 1175 | done 1176 | 1177 | # If we didn't find anything, pick the first entry in the 1178 | # list as the default to create and write to 1179 | if [ -z "${_target:-}" ]; then 1180 | local _rcfile_relative 1181 | _rcfile_relative="$(echo "$_rcfiles" | awk '{ print $1 }')" 1182 | _home="$(print_home_for_script "$_rcfile_relative")" 1183 | _target="$_home/$_rcfile_relative" 1184 | fi 1185 | 1186 | # `source x` is an alias for `. x`, and the latter is more portable/actually-posix. 1187 | # This apparently comes up a lot on freebsd. It's easy enough to always add 1188 | # the more robust line to rcfiles, but when telling the user to apply the change 1189 | # to their current shell ". x" is pretty easy to misread/miscopy, so we use the 1190 | # prettier "source x" line there. Hopefully people with Weird Shells are aware 1191 | # this is a thing and know to tweak it (or just restart their shell). 1192 | local _robust_line=". \"$_env_script_path_expr\"" 1193 | local _pretty_line="source \"$_env_script_path_expr\"" 1194 | 1195 | # Add the env script if it doesn't already exist 1196 | if [ ! -f "$_env_script_path" ]; then 1197 | say_verbose "creating $_env_script_path" 1198 | if [ "$_shell" = "sh" ]; then 1199 | write_env_script_sh "$_install_dir_expr" "$_env_script_path" 1200 | else 1201 | write_env_script_fish "$_install_dir_expr" "$_env_script_path" 1202 | fi 1203 | else 1204 | say_verbose "$_env_script_path already exists" 1205 | fi 1206 | 1207 | # Check if the line is already in the rcfile 1208 | # grep: 0 if matched, 1 if no match, and 2 if an error occurred 1209 | # 1210 | # Ideally we could use quiet grep (-q), but that makes "match" and "error" 1211 | # have the same behaviour, when we want "no match" and "error" to be the same 1212 | # (on error we want to create the file, which >> conveniently does) 1213 | # 1214 | # We search for both kinds of line here just to do the right thing in more cases. 1215 | if ! grep -F "$_robust_line" "$_target" > /dev/null 2>/dev/null && \ 1216 | ! grep -F "$_pretty_line" "$_target" > /dev/null 2>/dev/null 1217 | then 1218 | # If the script now exists, add the line to source it to the rcfile 1219 | # (This will also create the rcfile if it doesn't exist) 1220 | if [ -f "$_env_script_path" ]; then 1221 | local _line 1222 | # Fish has deprecated `.` as an alias for `source` and 1223 | # it will be removed in a later version. 1224 | # https://fishshell.com/docs/current/cmds/source.html 1225 | # By contrast, `.` is the traditional syntax in sh and 1226 | # `source` isn't always supported in all circumstances. 1227 | if [ "$_shell" = "fish" ]; then 1228 | _line="$_pretty_line" 1229 | else 1230 | _line="$_robust_line" 1231 | fi 1232 | say_verbose "adding $_line to $_target" 1233 | # prepend an extra newline in case the user's file is missing a trailing one 1234 | ensure echo "" >> "$_target" 1235 | ensure echo "$_line" >> "$_target" 1236 | return 1 1237 | fi 1238 | else 1239 | say_verbose "$_install_dir already on PATH" 1240 | fi 1241 | fi 1242 | } 1243 | 1244 | shotgun_install_dir_to_path() { 1245 | # Edit rcfiles ($HOME/.profile) to add install_dir to $PATH 1246 | # (Shotgun edition - write to all provided files that exist rather than just the first) 1247 | local _install_dir_expr="$1" 1248 | local _env_script_path="$2" 1249 | local _env_script_path_expr="$3" 1250 | local _rcfiles="$4" 1251 | local _shell="$5" 1252 | 1253 | if [ -n "${HOME:-}" ]; then 1254 | local _found=false 1255 | local _home 1256 | 1257 | for _rcfile_relative in $_rcfiles; do 1258 | _home="$(print_home_for_script "$_rcfile_relative")" 1259 | local _rcfile_abs="$_home/$_rcfile_relative" 1260 | 1261 | if [ -f "$_rcfile_abs" ]; then 1262 | _found=true 1263 | add_install_dir_to_path "$_install_dir_expr" "$_env_script_path" "$_env_script_path_expr" "$_rcfile_relative" "$_shell" 1264 | fi 1265 | done 1266 | 1267 | # Fall through to previous "create + write to first file in list" behavior 1268 | if [ "$_found" = false ]; then 1269 | add_install_dir_to_path "$_install_dir_expr" "$_env_script_path" "$_env_script_path_expr" "$_rcfiles" "$_shell" 1270 | fi 1271 | fi 1272 | } 1273 | 1274 | write_env_script_sh() { 1275 | # write this env script to the given path (this cat/EOF stuff is a "heredoc" string) 1276 | local _install_dir_expr="$1" 1277 | local _env_script_path="$2" 1278 | ensure cat < "$_env_script_path" 1279 | #!/bin/sh 1280 | # add binaries to PATH if they aren't added yet 1281 | # affix colons on either side of \$PATH to simplify matching 1282 | case ":\${PATH}:" in 1283 | *:"$_install_dir_expr":*) 1284 | ;; 1285 | *) 1286 | # Prepending path in case a system-installed binary needs to be overridden 1287 | export PATH="$_install_dir_expr:\$PATH" 1288 | ;; 1289 | esac 1290 | EOF 1291 | } 1292 | 1293 | write_env_script_fish() { 1294 | # write this env script to the given path (this cat/EOF stuff is a "heredoc" string) 1295 | local _install_dir_expr="$1" 1296 | local _env_script_path="$2" 1297 | ensure cat < "$_env_script_path" 1298 | if not contains "$_install_dir_expr" \$PATH 1299 | # Prepending path in case a system-installed binary needs to be overridden 1300 | set -x PATH "$_install_dir_expr" \$PATH 1301 | end 1302 | EOF 1303 | } 1304 | 1305 | check_proc() { 1306 | # Check for /proc by looking for the /proc/self/exe link 1307 | # This is only run on Linux 1308 | if ! test -L /proc/self/exe ; then 1309 | err "fatal: Unable to find /proc/self/exe. Is /proc mounted? Installation cannot proceed without /proc." 1310 | fi 1311 | } 1312 | 1313 | get_bitness() { 1314 | need_cmd head 1315 | # Architecture detection without dependencies beyond coreutils. 1316 | # ELF files start out "\x7fELF", and the following byte is 1317 | # 0x01 for 32-bit and 1318 | # 0x02 for 64-bit. 1319 | # The printf builtin on some shells like dash only supports octal 1320 | # escape sequences, so we use those. 1321 | local _current_exe_head 1322 | _current_exe_head=$(head -c 5 /proc/self/exe ) 1323 | if [ "$_current_exe_head" = "$(printf '\177ELF\001')" ]; then 1324 | echo 32 1325 | elif [ "$_current_exe_head" = "$(printf '\177ELF\002')" ]; then 1326 | echo 64 1327 | else 1328 | err "unknown platform bitness" 1329 | fi 1330 | } 1331 | 1332 | is_host_amd64_elf() { 1333 | need_cmd head 1334 | need_cmd tail 1335 | # ELF e_machine detection without dependencies beyond coreutils. 1336 | # Two-byte field at offset 0x12 indicates the CPU, 1337 | # but we're interested in it being 0x3E to indicate amd64, or not that. 1338 | local _current_exe_machine 1339 | _current_exe_machine=$(head -c 19 /proc/self/exe | tail -c 1) 1340 | [ "$_current_exe_machine" = "$(printf '\076')" ] 1341 | } 1342 | 1343 | get_endianness() { 1344 | local cputype=$1 1345 | local suffix_eb=$2 1346 | local suffix_el=$3 1347 | 1348 | # detect endianness without od/hexdump, like get_bitness() does. 1349 | need_cmd head 1350 | need_cmd tail 1351 | 1352 | local _current_exe_endianness 1353 | _current_exe_endianness="$(head -c 6 /proc/self/exe | tail -c 1)" 1354 | if [ "$_current_exe_endianness" = "$(printf '\001')" ]; then 1355 | echo "${cputype}${suffix_el}" 1356 | elif [ "$_current_exe_endianness" = "$(printf '\002')" ]; then 1357 | echo "${cputype}${suffix_eb}" 1358 | else 1359 | err "unknown platform endianness" 1360 | fi 1361 | } 1362 | 1363 | get_architecture() { 1364 | local _ostype 1365 | local _cputype 1366 | _ostype="$(uname -s)" 1367 | _cputype="$(uname -m)" 1368 | local _clibtype="gnu" 1369 | local _local_glibc 1370 | 1371 | if [ "$_ostype" = Linux ]; then 1372 | if [ "$(uname -o)" = Android ]; then 1373 | _ostype=Android 1374 | fi 1375 | if ldd --version 2>&1 | grep -q 'musl'; then 1376 | _clibtype="musl-dynamic" 1377 | else 1378 | # Assume all other linuxes are glibc (even if wrong, static libc fallback will apply) 1379 | _clibtype="gnu" 1380 | fi 1381 | fi 1382 | 1383 | if [ "$_ostype" = Darwin ] && [ "$_cputype" = i386 ]; then 1384 | # Darwin `uname -m` lies 1385 | if sysctl hw.optional.x86_64 | grep -q ': 1'; then 1386 | _cputype=x86_64 1387 | fi 1388 | fi 1389 | 1390 | if [ "$_ostype" = Darwin ] && [ "$_cputype" = x86_64 ]; then 1391 | # Rosetta on aarch64 1392 | if [ "$(sysctl -n hw.optional.arm64 2>/dev/null)" = "1" ]; then 1393 | _cputype=aarch64 1394 | fi 1395 | fi 1396 | 1397 | if [ "$_ostype" = SunOS ]; then 1398 | # Both Solaris and illumos presently announce as "SunOS" in "uname -s" 1399 | # so use "uname -o" to disambiguate. We use the full path to the 1400 | # system uname in case the user has coreutils uname first in PATH, 1401 | # which has historically sometimes printed the wrong value here. 1402 | if [ "$(/usr/bin/uname -o)" = illumos ]; then 1403 | _ostype=illumos 1404 | fi 1405 | 1406 | # illumos systems have multi-arch userlands, and "uname -m" reports the 1407 | # machine hardware name; e.g., "i86pc" on both 32- and 64-bit x86 1408 | # systems. Check for the native (widest) instruction set on the 1409 | # running kernel: 1410 | if [ "$_cputype" = i86pc ]; then 1411 | _cputype="$(isainfo -n)" 1412 | fi 1413 | fi 1414 | 1415 | case "$_ostype" in 1416 | 1417 | Android) 1418 | _ostype=linux-android 1419 | ;; 1420 | 1421 | Linux) 1422 | check_proc 1423 | _ostype=unknown-linux-$_clibtype 1424 | _bitness=$(get_bitness) 1425 | ;; 1426 | 1427 | FreeBSD) 1428 | _ostype=unknown-freebsd 1429 | ;; 1430 | 1431 | NetBSD) 1432 | _ostype=unknown-netbsd 1433 | ;; 1434 | 1435 | DragonFly) 1436 | _ostype=unknown-dragonfly 1437 | ;; 1438 | 1439 | Darwin) 1440 | _ostype=apple-darwin 1441 | ;; 1442 | 1443 | illumos) 1444 | _ostype=unknown-illumos 1445 | ;; 1446 | 1447 | MINGW* | MSYS* | CYGWIN* | Windows_NT) 1448 | _ostype=pc-windows-gnu 1449 | ;; 1450 | 1451 | *) 1452 | err "unrecognized OS type: $_ostype" 1453 | ;; 1454 | 1455 | esac 1456 | 1457 | case "$_cputype" in 1458 | 1459 | i386 | i486 | i686 | i786 | x86) 1460 | _cputype=i686 1461 | ;; 1462 | 1463 | xscale | arm) 1464 | _cputype=arm 1465 | if [ "$_ostype" = "linux-android" ]; then 1466 | _ostype=linux-androideabi 1467 | fi 1468 | ;; 1469 | 1470 | armv6l) 1471 | _cputype=arm 1472 | if [ "$_ostype" = "linux-android" ]; then 1473 | _ostype=linux-androideabi 1474 | else 1475 | _ostype="${_ostype}eabihf" 1476 | fi 1477 | ;; 1478 | 1479 | armv7l | armv8l) 1480 | _cputype=armv7 1481 | if [ "$_ostype" = "linux-android" ]; then 1482 | _ostype=linux-androideabi 1483 | else 1484 | _ostype="${_ostype}eabihf" 1485 | fi 1486 | ;; 1487 | 1488 | aarch64 | arm64) 1489 | _cputype=aarch64 1490 | ;; 1491 | 1492 | x86_64 | x86-64 | x64 | amd64) 1493 | _cputype=x86_64 1494 | ;; 1495 | 1496 | mips) 1497 | _cputype=$(get_endianness mips '' el) 1498 | ;; 1499 | 1500 | mips64) 1501 | if [ "$_bitness" -eq 64 ]; then 1502 | # only n64 ABI is supported for now 1503 | _ostype="${_ostype}abi64" 1504 | _cputype=$(get_endianness mips64 '' el) 1505 | fi 1506 | ;; 1507 | 1508 | ppc) 1509 | _cputype=powerpc 1510 | ;; 1511 | 1512 | ppc64) 1513 | _cputype=powerpc64 1514 | ;; 1515 | 1516 | ppc64le) 1517 | _cputype=powerpc64le 1518 | ;; 1519 | 1520 | s390x) 1521 | _cputype=s390x 1522 | ;; 1523 | riscv64) 1524 | _cputype=riscv64gc 1525 | ;; 1526 | loongarch64) 1527 | _cputype=loongarch64 1528 | ;; 1529 | *) 1530 | err "unknown CPU type: $_cputype" 1531 | 1532 | esac 1533 | 1534 | # Detect 64-bit linux with 32-bit userland 1535 | if [ "${_ostype}" = unknown-linux-gnu ] && [ "${_bitness}" -eq 32 ]; then 1536 | case $_cputype in 1537 | x86_64) 1538 | # 32-bit executable for amd64 = x32 1539 | if is_host_amd64_elf; then { 1540 | err "x32 linux unsupported" 1541 | }; else 1542 | _cputype=i686 1543 | fi 1544 | ;; 1545 | mips64) 1546 | _cputype=$(get_endianness mips '' el) 1547 | ;; 1548 | powerpc64) 1549 | _cputype=powerpc 1550 | ;; 1551 | aarch64) 1552 | _cputype=armv7 1553 | if [ "$_ostype" = "linux-android" ]; then 1554 | _ostype=linux-androideabi 1555 | else 1556 | _ostype="${_ostype}eabihf" 1557 | fi 1558 | ;; 1559 | riscv64gc) 1560 | err "riscv64 with 32-bit userland unsupported" 1561 | ;; 1562 | esac 1563 | fi 1564 | 1565 | # treat armv7 systems without neon as plain arm 1566 | if [ "$_ostype" = "unknown-linux-gnueabihf" ] && [ "$_cputype" = armv7 ]; then 1567 | if ensure grep '^Features' /proc/cpuinfo | grep -q -v neon; then 1568 | # At least one processor does not have NEON. 1569 | _cputype=arm 1570 | fi 1571 | fi 1572 | 1573 | _arch="${_cputype}-${_ostype}" 1574 | 1575 | RETVAL="$_arch" 1576 | } 1577 | 1578 | say() { 1579 | if [ "0" = "$PRINT_QUIET" ]; then 1580 | echo "$1" 1581 | fi 1582 | } 1583 | 1584 | say_verbose() { 1585 | if [ "1" = "$PRINT_VERBOSE" ]; then 1586 | echo "$1" 1587 | fi 1588 | } 1589 | 1590 | err() { 1591 | if [ "0" = "$PRINT_QUIET" ]; then 1592 | local red 1593 | local reset 1594 | red=$(tput setaf 1 2>/dev/null || echo '') 1595 | reset=$(tput sgr0 2>/dev/null || echo '') 1596 | say "${red}ERROR${reset}: $1" >&2 1597 | fi 1598 | exit 1 1599 | } 1600 | 1601 | need_cmd() { 1602 | if ! check_cmd "$1" 1603 | then err "need '$1' (command not found)" 1604 | fi 1605 | } 1606 | 1607 | check_cmd() { 1608 | command -v "$1" > /dev/null 2>&1 1609 | return $? 1610 | } 1611 | 1612 | assert_nz() { 1613 | if [ -z "$1" ]; then err "assert_nz $2"; fi 1614 | } 1615 | 1616 | # Run a command that should never fail. If the command fails execution 1617 | # will immediately terminate with an error showing the failing 1618 | # command. 1619 | ensure() { 1620 | if ! "$@"; then err "command failed: $*"; fi 1621 | } 1622 | 1623 | # This is just for indicating that commands' results are being 1624 | # intentionally ignored. Usually, because it's being executed 1625 | # as part of error handling. 1626 | ignore() { 1627 | "$@" 1628 | } 1629 | 1630 | # This wraps curl or wget. Try curl first, if not installed, 1631 | # use wget instead. 1632 | downloader() { 1633 | if check_cmd curl 1634 | then _dld=curl 1635 | elif check_cmd wget 1636 | then _dld=wget 1637 | else _dld='curl or wget' # to be used in error message of need_cmd 1638 | fi 1639 | 1640 | if [ "$1" = --check ] 1641 | then need_cmd "$_dld" 1642 | elif [ "$_dld" = curl ] 1643 | then curl -sSfL "$1" -o "$2" 1644 | elif [ "$_dld" = wget ] 1645 | then wget "$1" -O "$2" 1646 | else err "Unknown downloader" # should not reach here 1647 | fi 1648 | } 1649 | 1650 | download_binary_and_run_installer "$@" || exit 1 1651 | --------------------------------------------------------------------------------