├── .gitignore ├── LICENSE ├── README.md ├── audio ├── audio_classification │ ├── README.md │ ├── music_embedding.png │ ├── music_genre_classification.ipynb │ └── music_recog_system.png └── audio_fingerprint │ ├── README.md │ ├── audio_fingerprint_advanced.ipynb │ ├── audio_fingerprint_beginner.ipynb │ ├── audio_tn.png │ ├── demo.png │ ├── fingerprint.png │ ├── query.png │ └── storage.png ├── data_science ├── credit_card_approval_prediction │ ├── README.md │ └── credit_card_approval_prediction.ipynb └── dimension_reduction │ ├── README.md │ └── demo.ipynb ├── fine_tune ├── 0_towhee_trainer_overview.ipynb ├── 1_quick_start.ipynb ├── 2_fine_tune_with_mnist.ipynb ├── 3_train_a_bird_classification_model.ipynb ├── 4_training_configs.ipynb ├── 5_train_cub_200_2011.ipynb ├── 6_train_language_modeling_tasks │ ├── README.md │ ├── fine_tune_bert_on_masked_language_modeling.ipynb │ └── fine_tune_gpt2_on_causal_language_modeling.ipynb ├── 7_fine_tune_audio_embedding_operator.ipynb ├── 8_train_semantic_textual_similarity.ipynb ├── 9_fine_tune_video_deduplication_model.ipynb └── README.md ├── image ├── image_animation │ ├── README.md │ ├── anime_style_transformer.ipynb │ ├── example.ipynb │ ├── main.py │ ├── test.png │ └── utils.py ├── image_deduplication │ ├── Lenna.png │ ├── README.md │ ├── image_dedup.png │ ├── image_deduplication.ipynb │ └── logo.png ├── reverse_image_search │ ├── 1_build_image_search_engine.ipynb │ ├── 2_deep_dive_image_search.ipynb │ ├── README.md │ └── workflow.png ├── text_image_search │ ├── 1_build_text_image_search_engine.ipynb │ ├── 2_deep_dive_text_image_search.ipynb │ ├── 3_build_chinese_image_search_engine.ipynb │ ├── README.md │ ├── teddy.png │ ├── train │ │ ├── IMG000000.png │ │ ├── IMG000001.png │ │ ├── IMG000002.png │ │ ├── IMG000003.png │ │ └── IMG000004.png │ └── workflow.png ├── visualization │ ├── README.md │ ├── cat_and_dog.png │ ├── cls2idx.py │ ├── feder_towhee.png │ ├── towhee1.png │ ├── towhee2.png │ ├── towhee3.png │ ├── under_the_hood_anns_index.ipynb │ └── under_the_hood_embedding_models.ipynb └── yolo.ipynb ├── image_generation ├── how_to_generate_image_given_text.ipynb └── img.png ├── medical └── molecular_search │ ├── 1_build_molecular_search_engine.ipynb │ ├── 2_deep_dive_molecular_search.ipynb │ └── README.md ├── nlp ├── question_answering │ ├── 1_build_question_answering_engine.ipynb │ ├── README.md │ ├── question_answer.csv │ └── workflow.png └── text_search │ ├── README.md │ └── search_article_in_medium.ipynb ├── pipeline ├── 1.jpg ├── 2.jpg ├── 3.jpg ├── README.md ├── broken.jpg ├── concat_node.png ├── filter_node.png ├── flat_map_node.png ├── getting_started_with_pipeline.ipynb ├── map_node.png ├── time_window_node.png ├── window_all.png └── window_node.png └── video ├── deepfake_detection ├── 1_deepfake_detection.ipynb ├── README.md └── deepfake.jpg ├── reverse_video_search ├── 1_reverse_video_search_engine.ipynb ├── 2_deep_dive_reverse_video_search.ipynb ├── README.md ├── reverse_video_search.png └── tmp │ ├── Ou1w86qEr58.gif │ ├── V7DUq0JJneY.gif │ ├── bTCznQiu0hc.gif │ └── ty4UQlowp0c.gif ├── text_video_retrieval ├── 1_text_video_retrieval_engine.ipynb ├── README.md └── tmp_gifs │ ├── video7365.gif │ ├── video7579.gif │ ├── video7725.gif │ ├── video8068.gif │ └── video9258.gif ├── video_copy_detection ├── README.md ├── segment_level │ ├── example.png │ ├── tmp_gifs │ │ ├── d62ce5becff14a0c9c7dab5eea6647dc_the_wandering_earth_Wu_Jing_became_teenage_idol-1qf4y1G7gM.gif │ │ ├── e5dc80abd7a24b47accde190c9fdbcdc-The_Wandering_Earth_is_on_CCTV-1db411m7f8.gif │ │ └── ef65e0f662e646a88a13b6eddb640e48-News_Broadcast_on_Wandering_Earth-1xb411U7uE.gif │ ├── tmp_gifs2 │ │ ├── 0640bd5d43d1499c962e275be6b804ef-Does_MaDongmei_live_here-1e64y1y799.gif │ │ └── ad244c924f31461a9d809c77ae251ac1-the_classic_dialogue_what_is_Ma_Mei-1y7411n7y1.gif │ ├── video_decopy_insert.png │ ├── video_decopy_query.png │ └── video_deduplication_at_segment_level.ipynb └── video_level │ ├── tmp_gifs │ ├── 2bdf8029b38735a992a56e32cfc81466eea81286.gif │ ├── b61905d41276ccf2af59d4985158f8b1ce1d4990.gif │ └── e2adc784b83446ae775f698b9d17c9fd392b2f75.gif │ └── video_deduplication_at_video_level.ipynb └── video_tagging ├── README.md ├── action_classification.ipynb └── action_classification_demo.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .DS_Store 132 | 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Towhee Examples 2 |

3 | 4 | Logo 5 | 6 |

7 | Towhee Examples are used to analyze the unstructured data with towhee, such as reverse image search, reverse video search, audio classification, question and answer systems, molecular search, etc. 8 |
9 |
10 | Report Bug or Request Feature 11 |

12 |

13 | ​ 14 | 15 | 16 | ## About Towhee Examples 17 | 18 | x2vec, [Towhee](https://github.com/towhee-io/towhee) is all you need! Towhee can generate embedding vectors via a pipeline of ML models and other operations. It aims to make democratize `x2vec`, allowing everyone - from beginner developers to large organizations - to generate dense embeddings with just a few lines of code. 19 | 20 | 21 | 22 | There are many interesting examples that use Towhee to process various unstructured data like images, audio, video, etc. You can easily run these examples on your machine. 23 | 24 | ## Funny Example List 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 37 | 40 | 41 | 42 | 43 | 46 | 50 | 51 | 52 | 56 | 60 | 61 | 62 | 66 | 70 | 71 | 72 | 76 | 79 | 80 | 84 | 87 | 88 | 89 | 90 | 93 | 97 | 98 | 99 | 103 | 106 | 107 | 108 | 109 | 112 | 116 | 117 | 118 | 122 | 125 | 126 | 127 | 131 | 134 | 135 | 136 | 140 | 143 | 144 | 145 | 146 | 149 | 152 | 153 | 154 | 155 | 158 | 161 | 162 | 163 | 164 | 167 | 169 | 170 | 171 | 172 | 175 | 178 | 179 |
BootcampOperators
Getting StartedGetting Started with Pipeline 35 |

An introduction to `Pipeline`, which can help you better learn the data processing pipeline with Towhee.

36 |
38 | 39 |
ImageReverse Image Search
44 |

Search for images that are similar or related to the input image, it supports a lot of models such as ResNet, VGG, EfficientNet, ViT, etc.

45 |
47 | Image Embedding
48 | Timm
49 |
53 | Image Animation
54 |

Convert an image into an animated image.

55 |
57 | Animegan
58 | Cartoongan
59 |
63 | Image Deduplication
64 |

Find exact or near-exact duplicates within a collection of images.

65 |
67 | Image Decode
68 | Timm
69 |
73 | Text Image Search
74 |

Returns images related to the description of the input query text, which is cross-modal retrieval.

75 |
77 | CLIP
78 |
81 | Visualization
82 |

Under the hood: Embedding models and ANNS indexes in image search.

83 |
85 | Image Embedding
86 |
NLPQ&A System
91 |

Process user questions and give answers through natural language technology.

92 |
94 | Text Embedding
95 | DPR
96 |
100 |

Text Search

101 |

Search most similar text to the query text across all data.

102 |
104 | DPR
105 |
VideoReverse Video Search
110 |

It takes a video as input to search for similar videos.

111 |
113 | Action Classification
114 | Pytorchvideo
115 |
119 | Video Classification 120 |

Video Classification is the task of producing a label that is relevant to the video given its frames.

121 |
123 | Action Classification
124 |
128 | Text Video Search
129 |

Search for similar or related videos with the input text.

130 |
132 | CLIP4Clip
133 |
137 | Deepfake Detection
138 |

Predict the probability of a fake video for a given video.

139 |
141 | Deepfake
142 |
AudioAudio Classification
147 |

Categorize certain sounds into certain categories, such as ambient sound classification and speech recognition.

148 |
150 | Audio Classification 151 |
MedicalMolecular Search 156 |

Search for similar molecular formulas based on the Tanimoto metric, and also supports searching for substructures and superstructures.

157 |
159 | RDKit 160 |
Data ScienceCredit Card Approval Prediction 165 |

Predict whether the bank issues a credit card to the applicant, and the credit scores can objectively quantify the magnitude of risk.

166 |
168 |
TrainingFine Tune
173 |

Tutorial about how to fine tuen with towhee.

174 |
176 | Image Embedding 177 |
180 | 181 | 182 | ## Contributing 183 | 184 | Contributions to Milvus Bootcamp are welcome from everyone. See [Guidelines for Contributing](https://github.com/towhee-io/towhee/blob/main/CONTRIBUTING.md) for details. 185 | 186 | ## Support 187 | 188 | Join the Towhee community on [Slack](https://join.slack.com/t/towheeio/shared_invite/zt-19xhoo736-PhIYh~hwOBsDSy5ZvGWJxA) to give feedback, ask for advice, and direct questions to the engineering team. You can also submit [Issues](https://github.com/towhee-io/towhee/issues) or join [Discussions](https://github.com/towhee-io/towhee/discussions). 189 | -------------------------------------------------------------------------------- /audio/audio_classification/README.md: -------------------------------------------------------------------------------- 1 | # Audio Classification 2 | 3 | Audio classification predicts a label given an audio input. This label is a pre-defined name of class or category. 4 | 5 | This audio classification bootcamp mainly includes notebooks for difference scenarios. You can learn about audio classification solutions and basic concepts of [Towhee](https://towhee.io/) and [Milvus](https://milvus.io/) from these notebook tutorials. 6 | 7 | ## Learn from Notebook 8 | 9 | - [Music Genre Classification](./music_genre_classification.ipynb) 10 | 11 | In this notebook, you will build a basic music genre classification system with sample data (300 audio files of 10 classes), visualize predicted labels, and measure the system with performance metrics. At the end, you are able to build up a playable music genre classification system with a simple interface. 12 | -------------------------------------------------------------------------------- /audio/audio_classification/music_embedding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/audio/audio_classification/music_embedding.png -------------------------------------------------------------------------------- /audio/audio_classification/music_recog_system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/audio/audio_classification/music_recog_system.png -------------------------------------------------------------------------------- /audio/audio_fingerprint/README.md: -------------------------------------------------------------------------------- 1 | # Audio Fingerprint 2 | 3 | Audio fingerprinting is the process of extracting features to represent audio in digital numbers. Normally the process cuts the input audio into shorter clips with a fixed length. Then it converts each clip to a single fingerprint piece in a fixed size. With all small pieces together ordered by timestamps, a complete fingerprint is generated for the input audio. With audio fingerprints as identities, a system can recognize music with various transformations. 4 | 5 | This audio fingerprint bootcamp mainly includes notebooks for music detection using neural network for audio fingerprinting. You can learn about music detection solutions and basic concepts of [Towhee](https://towhee.io/) and [Milvus](https://milvus.io/) from these notebook tutorials. 6 | 7 | ## Learn from Notebook 8 | 9 | - [Audio Fingerprint I: Build a Demo with Towhee & Milvus](./audio_fingerprint_beginner.ipynb) 10 | 11 | In this notebook, you will build a basic music detection system using a pretrained deep learning model with sample data (100 candidate music), and measure the system performance with example queries (100 audio with noise). At the end, you are able to build up an online music detection system with simple user interface. 12 | 13 | - [Audio Fingerprint II: Music Detection with Temporal Localization](./audio_fingerprint_advanced.ipynb) 14 | 15 | In this notebook, you will learn about temporal localization with Towhee. With temporal localization, you are able to identify overlapping ranges between two audios if there exists any similar parts. This method can also be used as an additional postprocessing step of querying in music detection system. This notebook applies the temporal localization operator by Towhee to the music detection system, and evaluates the system performance. 16 | -------------------------------------------------------------------------------- /audio/audio_fingerprint/audio_tn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/audio/audio_fingerprint/audio_tn.png -------------------------------------------------------------------------------- /audio/audio_fingerprint/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/audio/audio_fingerprint/demo.png -------------------------------------------------------------------------------- /audio/audio_fingerprint/fingerprint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/audio/audio_fingerprint/fingerprint.png -------------------------------------------------------------------------------- /audio/audio_fingerprint/query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/audio/audio_fingerprint/query.png -------------------------------------------------------------------------------- /audio/audio_fingerprint/storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/audio/audio_fingerprint/storage.png -------------------------------------------------------------------------------- /data_science/credit_card_approval_prediction/README.md: -------------------------------------------------------------------------------- 1 | # Credit Card Approval Prediction 2 | 3 | Credit score cards are a common risk control method in the financial industry. It uses personal information and data submitted by credit card applicants to predict the probability of future defaults and credit card borrowings. The bank is able to decide whether to issue a credit card to the applicant. Credit scores can objectively quantify the magnitude of risk. 4 | 5 | 6 | 7 | This [notebook](credit_card_approval_prediction.ipynb) showing you how to use Towhee to predict whether the bank issues a credit card to the applicant, and it has a good following on [Kaggle](https://www.kaggle.com/code/chizzzy/credit-card-approval-prediction/notebook?scriptVersionId=92959791) as well. -------------------------------------------------------------------------------- /data_science/dimension_reduction/README.md: -------------------------------------------------------------------------------- 1 | # DimReduct 2 | > Embedding dimensionality reduction and visuallization for search efficiency. 3 | 4 | 5 | 6 | Dimension reduction is the transformation of data from a high-dimensional space into a low-dimensional space so that the low-dimensional representation retains some meaningful properties of the original data, ideally close to its [intrinsic dimension](https://en.wikipedia.org/wiki/Intrinsic_dimension). Embedding matching in high-dimensional spaces can be undesirable for many reasons; raw data are often [sparse](https://en.wikipedia.org/wiki/Sparse_matrix) as a consequence of the [curse of dimensionality](https://en.wikipedia.org/wiki/Curse_of_dimensionality), and analyzing the data is usually [computationally intractable](https://en.wikipedia.org/wiki/Computational_complexity_theory#Intractability) (hard to control or deal with). 7 | 8 | This [notebook](https://github.com/Sharp-rookie/examples/blob/dim_reduct/data_science/dimension_reduction/demo.ipynb) showing you how to use Towhee to reduce the dimension of an embedding space to accelerate the embedding matching. 9 | -------------------------------------------------------------------------------- /fine_tune/1_quick_start.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "name": "#%% md\n" 9 | } 10 | }, 11 | "source": [ 12 | "# Quick Start" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "Follow steps below to get started with a jupyter notebook for how to train a Towhee operator. This example fine-tunes a pretrained model (eg. resnet-18) with a fake dataset." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# 1. Setup Operator" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Create operator and load model by name." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": { 40 | "pycharm": { 41 | "name": "#%%\n" 42 | } 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import warnings #\n", 47 | "warnings.filterwarnings(\"ignore\") #\n", 48 | "from towhee.trainer.training_config import TrainingConfig\n", 49 | "from torchvision import transforms\n", 50 | "from towhee import dataset\n", 51 | "import towhee\n", 52 | "\n", 53 | "op = towhee.ops.image_embedding.timm(model_name='resnet18', num_classes=10).get_op()" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "# 2. Configure Trainer:" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "Modify training configurations on top of default values." 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 2, 73 | "metadata": { 74 | "pycharm": { 75 | "name": "#%%\n" 76 | } 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "# build a training config:\n", 81 | "training_config = TrainingConfig(\n", 82 | " batch_size=2,\n", 83 | " epoch_num=2,\n", 84 | " output_dir='quick_start_output'\n", 85 | ")" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "# 3. Prepare Dataset" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "The example here uses a fake dataset for both training and evaluation." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 3, 105 | "metadata": { 106 | "pycharm": { 107 | "name": "#%%\n" 108 | } 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "# prepare the dataset\n", 113 | "fake_transform = transforms.Compose([transforms.ToTensor()])\n", 114 | "train_data = dataset('fake', size=20, transform=fake_transform)\n", 115 | "eval_data = dataset('fake', size=10, transform=fake_transform)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "# 4. Start Training\n" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "Now everything is ready, start training.\n", 130 | "\n" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 4, 136 | "metadata": { 137 | "pycharm": { 138 | "name": "#%%\n" 139 | } 140 | }, 141 | "outputs": [ 142 | { 143 | "name": "stderr", 144 | "output_type": "stream", 145 | "text": [ 146 | "2023-02-14 16:35:55,853 - 139855987099456 - trainer.py-trainer:319 - WARNING: TrainingConfig(output_dir='quick_start_output', overwrite_output_dir=True, eval_strategy='epoch', eval_steps=None, batch_size=2, val_batch_size=-1, seed=42, epoch_num=2, dataloader_pin_memory=True, dataloader_drop_last=True, dataloader_num_workers=0, lr=5e-05, metric='Accuracy', print_steps=None, load_best_model_at_end=False, early_stopping={'monitor': 'eval_epoch_metric', 'patience': 4, 'mode': 'max'}, model_checkpoint={'every_n_epoch': 1}, tensorboard={'log_dir': None, 'comment': ''}, loss='CrossEntropyLoss', optimizer='Adam', lr_scheduler_type='linear', warmup_ratio=0.0, warmup_steps=0, device_str=None, sync_bn=False, freeze_bn=False)\n" 147 | ] 148 | }, 149 | { 150 | "data": { 151 | "application/vnd.jupyter.widget-view+json": { 152 | "model_id": "372d50ebdd604183994ad3f5e236424c", 153 | "version_major": 2, 154 | "version_minor": 0 155 | }, 156 | "text/plain": [ 157 | " 0%| | 0/10 [00:00" 279 | ] 280 | }, 281 | "metadata": { 282 | "needs_background": "light" 283 | }, 284 | "output_type": "display_data" 285 | }, 286 | { 287 | "name": "stdout", 288 | "output_type": "stream", 289 | "text": [ 290 | "this picture is number 2\n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "import matplotlib.pyplot as plt\n", 296 | "import torch\n", 297 | "import random\n", 298 | "\n", 299 | "# get random picture and predict it.\n", 300 | "img_index = random.randint(0, len(eval_data))\n", 301 | "img = eval_data.dataset[img_index][0]\n", 302 | "img = img.numpy().transpose(1, 2, 0) # (C, H, W) -> (H, W, C)\n", 303 | "pil_img = img * std + mean\n", 304 | "plt.imshow(pil_img)\n", 305 | "plt.show()\n", 306 | "test_img = eval_data.dataset[img_index][0].unsqueeze(0).to(op.trainer.configs.device)\n", 307 | "out = op.trainer.predict(test_img)\n", 308 | "predict_num = torch.argmax(torch.softmax(out, dim=-1)).item()\n", 309 | "print('this picture is number {}'.format(predict_num))" 310 | ] 311 | } 312 | ], 313 | "metadata": { 314 | "kernelspec": { 315 | "display_name": "Python 3", 316 | "language": "python", 317 | "name": "python3" 318 | }, 319 | "language_info": { 320 | "codemirror_mode": { 321 | "name": "ipython", 322 | "version": 3 323 | }, 324 | "file_extension": ".py", 325 | "mimetype": "text/x-python", 326 | "name": "python", 327 | "nbconvert_exporter": "python", 328 | "pygments_lexer": "ipython3", 329 | "version": "3.8.8" 330 | } 331 | }, 332 | "nbformat": 4, 333 | "nbformat_minor": 1 334 | } -------------------------------------------------------------------------------- /fine_tune/4_training_configs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Training Configs\n", 8 | "You can always set up training configs directly in python scripts or with a yaml file. Refer to TrainingConfig for more API details.\n", 9 | "\n", 10 | "# 1. Default Configs\n", 11 | "You can dump default training configs or write customized training configs to a yaml file.\n", 12 | "\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import warnings #\n", 22 | "warnings.filterwarnings(\"ignore\") #\n", 23 | "from towhee.trainer.training_config import dump_default_yaml, TrainingConfig\n", 24 | "default_config_file = 'default_training_configs.yaml'\n", 25 | "dump_default_yaml(default_config_file)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "You can open default_training_configs.yaml, and you can get the default config yaml structure like this:\n", 33 | "```yaml\n", 34 | "train:\n", 35 | " output_dir: ./output_dir\n", 36 | " overwrite_output_dir: true\n", 37 | " eval_strategy: epoch\n", 38 | " eval_steps:\n", 39 | " batch_size: 8\n", 40 | " val_batch_size: -1\n", 41 | " seed: 42\n", 42 | " epoch_num: 2\n", 43 | " dataloader_pin_memory: true\n", 44 | " dataloader_drop_last: true\n", 45 | " dataloader_num_workers: 0\n", 46 | " load_best_model_at_end: false\n", 47 | " freeze_bn: false\n", 48 | "device:\n", 49 | " device_str:\n", 50 | " sync_bn: false\n", 51 | "logging:\n", 52 | " print_steps:\n", 53 | "learning:\n", 54 | " lr: 5e-05\n", 55 | " loss: CrossEntropyLoss\n", 56 | " optimizer: Adam\n", 57 | " lr_scheduler_type: linear\n", 58 | " warmup_ratio: 0.0\n", 59 | " warmup_steps: 0\n", 60 | "callback:\n", 61 | " early_stopping:\n", 62 | " monitor: eval_epoch_metric\n", 63 | " patience: 4\n", 64 | " mode: max\n", 65 | " model_checkpoint:\n", 66 | " every_n_epoch: 1\n", 67 | " tensorboard:\n", 68 | " log_dir:\n", 69 | " comment: ''\n", 70 | "metrics:\n", 71 | " metric: Accuracy\n", 72 | "```\n", 73 | "So the yaml file is corresponding to the TrainingConfig instance." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 2, 79 | "metadata": { 80 | "pycharm": { 81 | "name": "#%%\n" 82 | } 83 | }, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "TrainingConfig(output_dir='./output_dir', overwrite_output_dir=True, eval_strategy='epoch', eval_steps=None, batch_size=8, val_batch_size=-1, seed=42, epoch_num=2, dataloader_pin_memory=True, dataloader_drop_last=True, dataloader_num_workers=0, lr=5e-05, metric='Accuracy', print_steps=None, load_best_model_at_end=False, early_stopping={'monitor': 'eval_epoch_metric', 'patience': 4, 'mode': 'max'}, model_checkpoint={'every_n_epoch': 1}, tensorboard={'log_dir': None, 'comment': ''}, loss='CrossEntropyLoss', optimizer='Adam', lr_scheduler_type='linear', warmup_ratio=0.0, warmup_steps=0, device_str=None, freeze_bn=False)\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "training_configs = TrainingConfig().load_from_yaml(default_config_file)\n", 95 | "print(training_configs)\n", 96 | "training_configs.output_dir = 'my_test_output'\n", 97 | "training_configs.save_to_yaml('my_test_config.yaml')" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "Open my_test_config.yaml, and you will find `output_dir` is modified:\n", 105 | "```yaml\n", 106 | "train:\n", 107 | " output_dir: my_test_output\n", 108 | "```\n", 109 | "So there are 2 ways to set up the configs. One is using by class `TrainingConfig`, another is to overwrite the yaml file.\n", 110 | "\n", 111 | "# 2.Setting by TrainingConfig\n", 112 | "It's easy to set config using the TrainingConfig class. Just set the fields in TrainingConfig instance.\n", 113 | "You can get each config field introduction easily by `get_config_help()`." 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 3, 119 | "metadata": { 120 | "pycharm": { 121 | "name": "#%%\n" 122 | } 123 | }, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "- output_dir \n", 130 | " default: ./output_dir \n", 131 | " metadata_dict: {'help': 'The output directory where the model predictions and checkpoints will be written.', 'category': 'train'} \n", 132 | "\n", 133 | "- overwrite_output_dir \n", 134 | " default: True \n", 135 | " metadata_dict: {'help': 'Overwrite the content of the output directory.Use this to continue training if output_dir points to a checkpoint directory.', 'category': 'train'} \n", 136 | "\n", 137 | "- eval_strategy \n", 138 | " default: epoch \n", 139 | " metadata_dict: {'help': 'The evaluation strategy. It can be `steps`, `epoch`, `eval_epoch` or `no`,', 'category': 'train'} \n", 140 | "\n", 141 | "- eval_steps \n", 142 | " default: None \n", 143 | " metadata_dict: {'help': 'Run an evaluation every X steps.', 'category': 'train'} \n", 144 | "\n", 145 | "- batch_size \n", 146 | " default: 8 \n", 147 | " metadata_dict: {'help': 'Batch size for training.', 'category': 'train'} \n", 148 | "\n", 149 | "- val_batch_size \n", 150 | " default: -1 \n", 151 | " metadata_dict: {'help': 'Batch size for evaluation.', 'category': 'train'} \n", 152 | "\n", 153 | "- seed \n", 154 | " default: 42 \n", 155 | " metadata_dict: {'help': 'Random seed that will be set at the beginning of training.', 'category': 'train'} \n", 156 | "\n", 157 | "- epoch_num \n", 158 | " default: 2 \n", 159 | " metadata_dict: {'help': 'Total number of training epochs to perform.', 'category': 'train'} \n", 160 | "\n", 161 | "- dataloader_pin_memory \n", 162 | " default: True \n", 163 | " metadata_dict: {'help': 'Whether or not to pin memory for DataLoader.', 'category': 'train'} \n", 164 | "\n", 165 | "- dataloader_drop_last \n", 166 | " default: True \n", 167 | " metadata_dict: {'help': 'Drop the last incomplete batch if it is not divisible by the batch size.', 'category': 'train'} \n", 168 | "\n", 169 | "- dataloader_num_workers \n", 170 | " default: 0 \n", 171 | " metadata_dict: {'help': 'Number of subprocesses to use for data loading. Default 0 means that the data will be loaded in the main process. -1 means using all the cpu kernels, it will greatly improve the speed when distributed training.', 'category': 'train'} \n", 172 | "\n", 173 | "- lr \n", 174 | " default: 5e-05 \n", 175 | " metadata_dict: {'help': 'The initial learning rate.', 'category': 'learning'} \n", 176 | "\n", 177 | "- metric \n", 178 | " default: Accuracy \n", 179 | " metadata_dict: {'help': 'The metric to use to compare two different models.', 'category': 'metrics'} \n", 180 | "\n", 181 | "- print_steps \n", 182 | " default: None \n", 183 | " metadata_dict: {'help': 'If None, use the tqdm progress bar, otherwise it will print the logs on the screen every `print_steps`', 'category': 'logging'} \n", 184 | "\n", 185 | "- load_best_model_at_end \n", 186 | " default: False \n", 187 | " metadata_dict: {'help': 'Whether or not to load the best model found during training at the end of training.', 'category': 'train'} \n", 188 | "\n", 189 | "- early_stopping \n", 190 | " default: \n", 191 | " metadata_dict: {'help': 'If the metrics is not better than before in several epoch, it will stop the training.', 'category': 'callback'} \n", 192 | "\n", 193 | "- model_checkpoint \n", 194 | " default: \n", 195 | " metadata_dict: {'help': 'How many n epoch to save checkpoints', 'category': 'callback'} \n", 196 | "\n", 197 | "- tensorboard \n", 198 | " default: \n", 199 | " metadata_dict: {'help': 'Tensorboard.', 'category': 'callback'} \n", 200 | "\n", 201 | "- loss \n", 202 | " default: CrossEntropyLoss \n", 203 | " metadata_dict: {'help': 'Pytorch loss in torch.nn package', 'category': 'learning'} \n", 204 | "\n", 205 | "- optimizer \n", 206 | " default: Adam \n", 207 | " metadata_dict: {'help': 'Pytorch optimizer Class name in torch.optim package', 'category': 'learning'} \n", 208 | "\n", 209 | "- lr_scheduler_type \n", 210 | " default: linear \n", 211 | " metadata_dict: {'help': 'The scheduler type to use.eg. `linear`, `cosine`, `cosine_with_restarts`, `polynomial`, `constant`, `constant_with_warmup`', 'category': 'learning'} \n", 212 | "\n", 213 | "- warmup_ratio \n", 214 | " default: 0.0 \n", 215 | " metadata_dict: {'help': 'Linear warmup over warmup_ratio fraction of total steps.', 'category': 'learning'} \n", 216 | "\n", 217 | "- warmup_steps \n", 218 | " default: 0 \n", 219 | " metadata_dict: {'help': 'Linear warmup over warmup_steps.', 'category': 'learning'} \n", 220 | "\n", 221 | "- device_str \n", 222 | " default: None \n", 223 | " metadata_dict: {'help': 'None -> If there is a cuda env in the machine, it will use cuda:0, else cpu.\\n`cpu` -> Use cpu only.\\n`cuda:2` -> Use the No.2 gpu, the same for other numbers.\\n`cuda` -> Use all available gpu, using data parallel. If you want to use several specified gpus to run, you can specify the environment variable `CUDA_VISIBLE_DEVICES` as the number of gpus you need before running your training script.', 'category': 'device'} \n", 224 | "\n", 225 | "- freeze_bn \n", 226 | " default: False \n", 227 | " metadata_dict: {'help': 'will completely freeze all BatchNorm layers during training.', 'category': 'train'} \n", 228 | "\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "from towhee.trainer.training_config import get_config_help\n", 234 | "help_dict = get_config_help() # get config field introductions." 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": { 240 | "pycharm": { 241 | "name": "#%% md\n" 242 | } 243 | }, 244 | "source": [ 245 | "You can construct config by the construct function, or then modify you custom value.\n", 246 | "```python\n", 247 | "training_configs = TrainingConfig(\n", 248 | " xxx='some_value_xxx',\n", 249 | " yyy='some_value_yyy'\n", 250 | ")\n", 251 | "# or\n", 252 | "training_configs.aaa='some_value_aaa'\n", 253 | "training_configs.bbb='some_value_bbb'\n", 254 | "```" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": { 260 | "pycharm": { 261 | "name": "#%% md\n" 262 | } 263 | }, 264 | "source": [ 265 | "# 3.Setting by yaml file\n", 266 | "Your yaml file can be briefly with just some lines. You need not write the whole setting.\n", 267 | "```yaml\n", 268 | "train:\n", 269 | " output_dir: my_another_output\n", 270 | "```\n", 271 | "A yaml like this also works. Default values will be overwritten if not written.\n", 272 | "There are some point you should pay attention.\n", 273 | "- If a value is None in python, no value is required after the colon.\n", 274 | "- If the value is `True`/`False` in python, it's `true`/`false` in yaml.\n", 275 | "- If the field is `str` instance in python, no quotation marks required.\n", 276 | "- If the field value is `dict` instance in python, start another line after the colon, each line after that is each key-value pair info.\n", 277 | "```yaml\n", 278 | " early_stopping:\n", 279 | " monitor: eval_epoch_metric\n", 280 | " patience: 4\n", 281 | " mode: max\n", 282 | "```\n", 283 | "equals\n", 284 | "```python\n", 285 | "early_stopping = {\n", 286 | " 'monitor': 'eval_epoch_metric',\n", 287 | " 'patience': 4,\n", 288 | " 'mode': 'max'\n", 289 | " }\n", 290 | "```\n", 291 | "in python.\n" 292 | ] 293 | } 294 | ], 295 | "metadata": { 296 | "kernelspec": { 297 | "display_name": "Python 3", 298 | "language": "python", 299 | "name": "python3" 300 | }, 301 | "language_info": { 302 | "codemirror_mode": { 303 | "name": "ipython", 304 | "version": 3 305 | }, 306 | "file_extension": ".py", 307 | "mimetype": "text/x-python", 308 | "name": "python", 309 | "nbconvert_exporter": "python", 310 | "pygments_lexer": "ipython3", 311 | "version": "3.8.8" 312 | } 313 | }, 314 | "nbformat": 4, 315 | "nbformat_minor": 1 316 | } 317 | -------------------------------------------------------------------------------- /fine_tune/6_train_language_modeling_tasks/README.md: -------------------------------------------------------------------------------- 1 | ### What is language modeling? 2 | Language modeling (LM) is the use of various statistical and probabilistic techniques to determine the probability of a given sequence of words occurring in a sentence. It is the task of fitting a model to a corpus, which can be domain specific. Language models analyze bodies of text data to provide a basis for their word predictions. They are widely 3 | used in natural language processing (NLP) applications, particularly ones that generate text as an output. 4 | 5 | Most popular transformer-based models are trained using a variant of language modeling, e.g. BERT with masked language modeling, GPT-2 with causal language modeling. And they are two basic language modeling tasks. 6 | 7 | 8 | ### Masked Language Modeling 9 | 10 | Masked language modeling is the task of masking tokens in a sequence with a masking token, and prompting the model to fill that mask with an appropriate token. This allows the model to attend to both the right context (tokens on the right of the mask) and the left context (tokens on the left of the mask). Such a training creates a strong basis for downstream tasks requiring bi-directional context, such as question answering. 11 | 12 | ### Causal Language Modeling 13 | Causal language modeling is the task of predicting the token following a sequence of tokens. In this situation, the model only attends to the left context (tokens on the left of the mask). Such a training is particularly interesting for generation tasks. Usually, the next token is predicted by sampling from the logits of the last hidden state the model produces from the input sequence. 14 | 15 | Language modeling can be useful outside of pretraining as well, for example to shift the model distribution to be domain-specific: using a language model trained over a very large corpus, and then fine-tuning it to a news dataset or on scientific papers. 16 | 17 | ### Start to train 18 | These notebooks will teach you how to fine-tune a [towhee transformers operator](https://towhee.io/text-embedding/transformers) in both language modeling tasks using [hugging face transformer Trainer](https://huggingface.co/docs/transformers/main_classes/trainer) backend. 19 | - For Masked Language Modeling, go to [fine_tune_bert_on_masked_language_modeling.ipynb](./fine_tune_bert_on_masked_language_modeling.ipynb) 20 | - For Causal Language Modeling, go to [fine_tune_gpt2_on_causal_language_modeling.ipynb](./fine_tune_gpt2_on_causal_language_modeling.ipynb) 21 | -------------------------------------------------------------------------------- /fine_tune/7_fine_tune_audio_embedding_operator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "b4cee136", 6 | "metadata": {}, 7 | "source": [ 8 | "## Preparation\n", 9 | "\n", 10 | "### Install Dependencies\n", 11 | "\n", 12 | "First we need to install dependencies to support operator training and inference." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "id": "d0b9ef7d", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "! python -m pip install torch torchvision torchaudio torchmetrics==0.7.0 towhee towhee.models>=0.8.0" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "deb511ec", 28 | "metadata": {}, 29 | "source": [ 30 | "### Download dataset\n", 31 | "This op is trained on the [FMA dataset](https://github.com/mdeff/fma). We need to fine-tune on the [gtzan dataset](https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification). In addition to downloading the gtzan dataset, we also need to download several datasets to add noise during training. They are [Microphone impulse response dataset](http://micirp.blogspot.com/), [Aachen Impulse Response Database](https://www.iks.rwth-aachen.de/en/research/tools-downloads/databases/aachen-impulse-response-database/), and [AudioSet](https://research.google.com/audioset/download.html). These datasets are all publicly available, please contact us if there are any copyright issues.\n", 32 | "\n", 33 | "We have followed this [guide](https://github.com/stdio2016/pfann#prepare-dataset) for data preprocessing. All you need to do is directly download these processed data and information.\n", 34 | "\n", 35 | "Your can create a folder to store all the downloaded data, and it needs about 4G space." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "id": "142e3b5a", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import os\n", 46 | "dataset_path = './dataset'\n", 47 | "if not os.path.exists(dataset_path):\n", 48 | " os.mkdir(dataset_path)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "a344485d", 54 | "metadata": {}, 55 | "source": [ 56 | "#### Download gtzan dataset\n", 57 | "The [gtzan dataset](https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification) contains 1000 tracks of 30 second length. There are 10 genres, each containing 100 tracks which are all 22050Hz Mono 16-bit audio files in .wav format. " 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "id": "e9f68250", 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 71 | " Dload Upload Total Spent Left Speed\n", 72 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", 73 | "100 1168M 100 1168M 0 0 4129k 0 0:04:49 0:04:49 --:--:-- 4694k\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/gtzan_full.zip -o ./dataset/genres_original.zip\n", 79 | "! unzip -q -o ./dataset/genres_original.zip -d ./dataset\n", 80 | "! rm ./dataset/genres_original.zip" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "id": "51d6933b", 86 | "metadata": {}, 87 | "source": [ 88 | "#### Download Microphone impulse response dataset\n", 89 | "[Microphone impulse response dataset](http://micirp.blogspot.com/) contains the specially recorded microphone impulse response data, which can be used to adding noise during training." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 4, 95 | "id": "4c0f31b3", 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 103 | " Dload Upload Total Spent Left Speed\n", 104 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", 105 | "100 152k 100 152k 0 0 83217 0 0:00:01 0:00:01 --:--:-- 1456k\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/micirp.zip -o ./dataset/micirp.zip\n", 111 | "! unzip -q -o ./dataset/micirp.zip -d ./dataset\n", 112 | "! rm ./dataset/micirp.zip" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "id": "756b33d5", 118 | "metadata": {}, 119 | "source": [ 120 | "#### Download Aachen Impulse Response Database\n", 121 | "[Aachen Impulse Response Database](https://www.iks.rwth-aachen.de/en/research/tools-downloads/databases/aachen-impulse-response-database/) is a set of impulse responses that were measured in a wide variety of rooms. It can be used for adding noise during training." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 5, 127 | "id": "0ac55bd9", 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 135 | " Dload Upload Total Spent Left Speed\n", 136 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", 137 | "100 193M 100 193M 0 0 3854k 0 0:00:51 0:00:51 --:--:-- 4098k\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/AIR_1_4.zip -o ./dataset/AIR_1_4.zip\n", 143 | "! unzip -q -o ./dataset/AIR_1_4.zip -d ./dataset\n", 144 | "! rm ./dataset/AIR_1_4.zip" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "id": "e8680aea", 150 | "metadata": {}, 151 | "source": [ 152 | "#### Download AudioSet\n", 153 | "[AudioSet](https://research.google.com/audioset/download.html) is an audio event dataset, which consists of over 2M human-annotated 10-second video clips. We also use it for adding noise during training." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 6, 159 | "id": "15ca989c", 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 167 | " Dload Upload Total Spent Left Speed\n", 168 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", 169 | "100 1500M 100 1500M 0 0 3133k 0 0:08:10 0:08:10 --:--:-- 4729k8:41 0:06:51 0:01:50 2320k\n", 170 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 171 | " Dload Upload Total Spent Left Speed\n", 172 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", 173 | "100 582M 100 582M 0 0 2620k 0 0:03:47 0:03:47 --:--:-- 2613k\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/audioset_p1 -o ./dataset/audioset_p1\n", 179 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/audioset_p2 -o ./dataset/audioset_p2\n", 180 | "! cat ./dataset/audioset_p* > ./dataset/audioset.zip\n", 181 | "! unzip -q -o ./dataset/audioset.zip -d ./dataset\n", 182 | "! rm ./dataset/audioset_p* ./dataset/audioset.zip" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "id": "8ab69b3f", 188 | "metadata": {}, 189 | "source": [ 190 | "#### Download data information for training\n", 191 | "This is some data information that has been preprocessed for training." 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 7, 197 | "id": "3f6aa31d", 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 205 | " Dload Upload Total Spent Left Speed\n", 206 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", 207 | "100 25286 100 25286 0 0 14237 0 0:00:01 0:00:01 --:--:-- 355k\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/gtzan_info.zip -o ./gtzan_info.zip\n", 213 | "! unzip -q -o ./gtzan_info.zip\n", 214 | "! rm ./gtzan_info.zip" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "id": "cebb5f4c", 220 | "metadata": {}, 221 | "source": [ 222 | "## Fine-tune Audio Embedding with Neural Network Fingerprint operator" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "id": "b5e5f3fd", 228 | "metadata": {}, 229 | "source": [ 230 | "### Instantiate operator\n", 231 | "We can instantiate a towhee nnfp operator. This audio embedding operator converts an input audio into a dense vector which can be used to represent the audio clip's semantics. Each vector represents for an audio clip with a fixed length of around 1s. This operator generates audio embeddings with fingerprinting method introduced by Neural Audio Fingerprint. The nnfp operator is suitable for audio fingerprinting. " 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 8, 237 | "id": "84376759", 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "import towhee\n", 242 | "nnfp_op = towhee.ops.audio_embedding.nnfp().get_op()" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 9, 248 | "id": "f2fa8a5d", 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/plain": [ 254 | "(array([[-0.15469249, -0.02260398, -0.05088959, ..., 0.14650534,\n", 255 | " 0.04951884, -0.04235527],\n", 256 | " [-0.00608123, -0.06859994, -0.0750239 , ..., 0.0840608 ,\n", 257 | " 0.12196919, -0.1123263 ],\n", 258 | " [-0.18665867, 0.08474724, 0.03795987, ..., 0.06031123,\n", 259 | " -0.09239668, -0.08622654],\n", 260 | " ...,\n", 261 | " [ 0.02841254, 0.01915257, 0.02964114, ..., 0.04307787,\n", 262 | " -0.08863434, 0.0016751 ],\n", 263 | " [-0.0166699 , 0.08893833, 0.05510458, ..., 0.13624884,\n", 264 | " 0.03493905, -0.13401009],\n", 265 | " [-0.04592355, -0.07944845, 0.09267115, ..., 0.02575601,\n", 266 | " -0.09419111, 0.03918429]], dtype=float32),\n", 267 | " (10, 128))" 268 | ] 269 | }, 270 | "execution_count": 9, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "test_audio = 'dataset/audioset/B4lyT64WFjc_0.wav'\n", 277 | "embedding = nnfp_op(test_audio)\n", 278 | "embedding, embedding.shape" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "id": "3524531f", 284 | "metadata": {}, 285 | "source": [ 286 | "### Start training\n", 287 | "When initialized, this operator already contains the model with weights trained on the FMA data. The goal of our training is to fine-tune it on another audio dataset domain to better fit the new data distribution. \n", 288 | "\n", 289 | "We can first look at the default training configuration. " 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 10, 295 | "id": "41671a97", 296 | "metadata": {}, 297 | "outputs": [ 298 | { 299 | "name": "stdout", 300 | "output_type": "stream", 301 | "text": [ 302 | "{\r\n", 303 | " \"train_csv\": \"gtzan_info/lists/gtzan_train.csv\",\r\n", 304 | " \"validate_csv\": \"gtzan_info/lists/gtzan_valtest.csv\",\r\n", 305 | " \"test_csv\": \"gtzan_info/lists/gtzan_valtest.csv\",\r\n", 306 | " \"music_dir\": \"dataset/genres_original\",\r\n", 307 | " \"model_dir\": \"fma_test\",\r\n", 308 | " \"cache_dir\": \"caches\",\r\n", 309 | " \"batch_size\": 640,\r\n", 310 | " \"shuffle_size\": null,\r\n", 311 | " \"fftconv_n\": 32768,\r\n", 312 | " \"sample_rate\": 8000,\r\n", 313 | " \"stft_n\": 1024,\r\n", 314 | " \"stft_hop\": 256,\r\n", 315 | " \"n_mels\": 256,\r\n", 316 | " \"f_min\": 300,\r\n", 317 | " \"f_max\": 4000,\r\n", 318 | " \"segment_size\": 1,\r\n", 319 | " \"hop_size\": 0.5,\r\n", 320 | " \"time_offset\": 1.2,\r\n", 321 | " \"pad_start\": 0,\r\n", 322 | " \"epoch\": 1,\r\n", 323 | " \"lr\": 1e-4,\r\n", 324 | " \"tau\": 0.05,\r\n", 325 | " \"noise\": {\r\n", 326 | " \"train\": \"gtzan_info/lists/noise_train.csv\",\r\n", 327 | " \"validate\": \"gtzan_info/lists/noise_val.csv\",\r\n", 328 | " \"dir\": \"dataset/audioset\",\r\n", 329 | " \"snr_max\": 10,\r\n", 330 | " \"snr_min\": 0\r\n", 331 | " },\r\n", 332 | " \"micirp\": {\r\n", 333 | " \"train\": \"gtzan_info/lists/micirp_train.csv\",\r\n", 334 | " \"validate\": \"gtzan_info/lists/micirp_val.csv\",\r\n", 335 | " \"dir\": \"dataset/micirp\",\r\n", 336 | " \"length\": 0.5\r\n", 337 | " },\r\n", 338 | " \"air\": {\r\n", 339 | " \"train\": \"gtzan_info/lists/air_train.csv\",\r\n", 340 | " \"validate\": \"gtzan_info/lists/air_val.csv\",\r\n", 341 | " \"dir\": \"dataset/AIR_1_4\",\r\n", 342 | " \"length\": 1\r\n", 343 | " },\r\n", 344 | " \"cutout_min\": 0.1,\r\n", 345 | " \"cutout_max\": 0.5,\r\n", 346 | " \"model\": {\r\n", 347 | " \"d\": 128,\r\n", 348 | " \"h\": 1024,\r\n", 349 | " \"u\": 32,\r\n", 350 | " \"fuller\": true,\r\n", 351 | " \"conv_activation\": \"ReLU\"\r\n", 352 | " },\r\n", 353 | " \"indexer\": {\r\n", 354 | " \"index_factory\": \"IVF200,PQ64x8np\",\r\n", 355 | " \"top_k\": 100,\r\n", 356 | " \"frame_shift_mul\": 1\r\n", 357 | " }\r\n", 358 | "}\r\n" 359 | ] 360 | } 361 | ], 362 | "source": [ 363 | "! cat ./gtzan_info/default_gtzan.json" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "id": "473e06e1", 369 | "metadata": {}, 370 | "source": [ 371 | "This json contains some training configurations such as epoch, batch size, etc., as well as some data and model information. \n", 372 | "There are some csv file paths in this json, which contain the audio data information of the corresponding data set. \n", 373 | "We only need to pass this file path to the `train()` interface to train this operator." 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 11, 379 | "id": "64cc63e7", 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "name": "stdout", 384 | "output_type": "stream", 385 | "text": [ 386 | "loading noise dataset\n" 387 | ] 388 | }, 389 | { 390 | "name": "stderr", 391 | "output_type": "stream", 392 | "text": [ 393 | "100%|██████████| 1193/1193 [00:15<00:00, 74.76it/s]\n" 394 | ] 395 | }, 396 | { 397 | "name": "stdout", 398 | "output_type": "stream", 399 | "text": [ 400 | "torch.Size([95161077])\n", 401 | "loading Aachen IR dataset\n", 402 | "loading microphone IR dataset\n", 403 | "load cached music from caches/1gtzan_train.bin\n", 404 | "training data contains 47200 samples\n", 405 | "loading noise dataset\n" 406 | ] 407 | }, 408 | { 409 | "name": "stderr", 410 | "output_type": "stream", 411 | "text": [ 412 | "100%|██████████| 299/299 [00:03<00:00, 75.24it/s]\n" 413 | ] 414 | }, 415 | { 416 | "name": "stdout", 417 | "output_type": "stream", 418 | "text": [ 419 | "torch.Size([23878784])\n", 420 | "loading Aachen IR dataset\n", 421 | "loading microphone IR dataset\n", 422 | "load cached music from caches/1gtzan_valtest.bin\n", 423 | "evaluate before fine-tune...\n", 424 | "\n", 425 | "validate score: 0.805910\n" 426 | ] 427 | }, 428 | { 429 | "name": "stderr", 430 | "output_type": "stream", 431 | "text": [ 432 | "2023-01-13 11:42:46,713 - 140227902603840 - trainer.py-trainer:319 - WARNING: TrainingConfig(output_dir='fine_tune_output', overwrite_output_dir=True, eval_strategy='epoch', eval_steps=None, batch_size=640, val_batch_size=-1, seed=42, epoch_num=1, dataloader_pin_memory=True, dataloader_drop_last=True, dataloader_num_workers=0, lr=0.0001, metric='Accuracy', print_steps=None, load_best_model_at_end=False, early_stopping={'monitor': 'eval_epoch_metric', 'patience': 4, 'mode': 'max'}, model_checkpoint={'every_n_epoch': 1}, tensorboard={'log_dir': None, 'comment': ''}, loss='CrossEntropyLoss', optimizer='custom_', lr_scheduler_type='cosine', warmup_ratio=0.0, warmup_steps=0, device_str=None, sync_bn=False, freeze_bn=False)\n" 433 | ] 434 | }, 435 | { 436 | "data": { 437 | "application/vnd.jupyter.widget-view+json": { 438 | "model_id": "2f1ddfa67061446394cfe1003b553e5e", 439 | "version_major": 2, 440 | "version_minor": 0 441 | }, 442 | "text/plain": [ 443 | " 0%| | 0/148 [00:00/api/trans/) and pass the following inputs. 12 | 13 | Input:
14 |  - Image Byte: Passed as a file upload
15 |   Passes the Image on which AnimeGan Model has to be used
16 |
17 |  - model_name: Passed as a json request
18 |   Specifies the weights to be used for inference.
19 |   Supports 'celeba', 'facepaintv1', 'facepaitv2', 'hayao', 'paprika', 'shinkai' 20 |
21 |
22 | 23 | The api returns a byte object of the transformed image as a response. 24 | 25 | Returns:
26 | Returns a byte object of the transformed image after applying the animegan model 27 | 28 | Note: A sample Use case is shown in test.ipynb notebook. -------------------------------------------------------------------------------- /image/image_animation/main.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | import uvicorn 14 | from fastapi import FastAPI, File, UploadFile, Form, status, HTTPException 15 | from io import BytesIO 16 | import towhee 17 | import utils 18 | from starlette.responses import StreamingResponse 19 | 20 | app = FastAPI() 21 | 22 | @app.post("/api/trans") 23 | async def transform_api(file: UploadFile = File(...), model_name: str = Form(...)): 24 | extension = file.filename.split(".")[-1].lower() in ("jpg", "jpeg", "png") 25 | if not extension: 26 | raise HTTPException(status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, 27 | detail=f'File {file.filename} should be jpg, jpeg or png') 28 | 29 | if model_name.lower() not in ['celeba', 'facepaintv1', 'facepaitv2', 'hayao', 'paprika', 'shinkai']: 30 | return f"Specified Model: {model_name} Name Does not exist" 31 | 32 | input_image = utils.read_image(file.file.read()) 33 | file.file.close() 34 | output_image = utils.translate_image(input_image, model_name) 35 | filtered_image = BytesIO() 36 | output_image.save(filtered_image, "PNG") 37 | filtered_image.seek(0) 38 | 39 | return StreamingResponse(filtered_image, media_type="image/png") 40 | 41 | if __name__ == "__main__": 42 | uvicorn.run(app) -------------------------------------------------------------------------------- /image/image_animation/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/image_animation/test.png -------------------------------------------------------------------------------- /image/image_animation/utils.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from io import BytesIO 14 | from typing_extensions import final 15 | import numpy as np 16 | import towhee 17 | from PIL import Image 18 | import cv2 19 | 20 | def read_image(file: BytesIO) -> np.ndarray: 21 | ''' 22 | Reads the image received in byte format 23 | and convert into PIL.Image format 24 | 25 | Args: 26 | - file (BytesIO): Receives a byte stream of file 27 | 28 | Returns: 29 | - (PIL.Image): A PIL Image object translated from byte stream 30 | 31 | ''' 32 | image = Image.open(BytesIO(file)) 33 | return image 34 | 35 | def translate_image(input_img: Image, model: str) -> np.ndarray: 36 | ''' 37 | Convert the input PIL based on provided model 38 | using anime gan towhee pipeline. 39 | 40 | Args: 41 | - input_img (PIL.Image): PIL.Image object 42 | - model (str): Model to be run on image 43 | 44 | Returns: 45 | - (PIL.Image): Returns a PIL Image object corresponding to translated image (RGB format) 46 | ''' 47 | output_img = towhee.ops.img2img_translation.animegan(model_name = model)(input_img) 48 | out = np.array(output_img) 49 | out_rgb = cv2.cvtColor(out, cv2.COLOR_BGR2RGB) 50 | output = Image.fromarray(out_rgb) 51 | 52 | return output 53 | -------------------------------------------------------------------------------- /image/image_deduplication/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/image_deduplication/Lenna.png -------------------------------------------------------------------------------- /image/image_deduplication/README.md: -------------------------------------------------------------------------------- 1 | # Image Deduplication 2 | 3 | Image deduplication is the process of finding exact or near-exact duplicates within a collection of images. Sometimes, some images are not exactly the same as other images, this is where the difficulty here lies - matching pure duplicates is a simple process, but matching images which are similar in the presence of changes in zoom, lighting, and noise is a much more challenging problem. 4 | 5 | This [notebook](image_deduplication.ipynb) shows you how to use Towhee's dc API to compare duplicates or near-exact duplicates within a few lines of code. -------------------------------------------------------------------------------- /image/image_deduplication/image_dedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/image_deduplication/image_dedup.png -------------------------------------------------------------------------------- /image/image_deduplication/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/image_deduplication/logo.png -------------------------------------------------------------------------------- /image/reverse_image_search/README.md: -------------------------------------------------------------------------------- 1 | # Reverse Image Search 2 | 3 | **Reverse image search** helps you search for similar or related images given an input image. Reverse image search is a [content-based image retrieval](https://en.wikipedia.org/wiki/Content-based_image_retrieval) (CBIR) query technique that involves providing the CBIR system with a query image that it will then base its search upon. 4 | 5 | 6 | 7 | This reverse image search example mainly consists of two notebooks, and I think everyone can learn the basic operations of Towhee and Milvus through the [**getting started notebook**](./1_build_image_search_engine.ipynb). And the [**deep dive notebook**](./2_deep_dive_image_search.ipynb) will show you how to improve performance and deploy the service. 8 | 9 | ## Learn from Notebook 10 | 11 | - [Getting started](1_build_image_search_engine.ipynb) 12 | 13 | In this notebook you will get the prerequisites, how to complete a simple image system search and visualize results, and how to evaluate system performance with selected metric. 14 | 15 | - [Deep Dive](./2_deep_dive_image_search.ipynb) 16 | 17 | In this notebook you will learn how to normalize embeddings, apply object detection, and reduce embedding dimension. 18 | There is also a section starting a simple online demo using Gradio. 19 | -------------------------------------------------------------------------------- /image/reverse_image_search/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/reverse_image_search/workflow.png -------------------------------------------------------------------------------- /image/text_image_search/2_deep_dive_text_image_search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "c2218b66", 7 | "metadata": {}, 8 | "source": [ 9 | "# Deep Dive into Text-Image Search Engine with Towhee\n", 10 | "\n", 11 | "In the [previous tutorial](./1_build_text_image_search_engine.ipynb), we built and prototyped a proof-of-concept image search engine. Now, let's finetune it with our own image datasets, and deploy it with accleration service." 12 | ] 13 | }, 14 | { 15 | "attachments": {}, 16 | "cell_type": "markdown", 17 | "id": "ae6b056f", 18 | "metadata": {}, 19 | "source": [ 20 | "## Finetune Text-Image Search on Custom Dataset\n", 21 | "\n", 22 | "### Install Dependencies\n", 23 | "\n", 24 | "Firstly, we need to install dependencies such as towhee and opencv-python. And please make sure that you have started a [Milvus service](https://milvus.io/docs/install_standalone-docker.md). This notebook uses [milvus 2.2.10](https://milvus.io/docs/v2.2.x/install_standalone-docker.md) and [pymilvus 2.2.11](https://milvus.io/docs/release_notes.md#2210)." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "id": "bca1652c", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "! python -m pip -q install towhee opencv-python pymilvus==2.2.11" 35 | ] 36 | }, 37 | { 38 | "attachments": {}, 39 | "cell_type": "markdown", 40 | "id": "ba622fa5", 41 | "metadata": {}, 42 | "source": [ 43 | "### Prepare the Data\n", 44 | "\n", 45 | "For text-image search, we use CIFAR-10 dataset as an example to show how to finetune CLIP model for users' customized dataset. CIFAR-10 dataset contains 60,000 32x32 color images in 10 different classes. It is widely used as an image recognition benchmark for various computer vision models. In this example, we manually create the caption by creating the sentence with its corresponding label.\n" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 28, 51 | "id": "5152da61", 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Files already downloaded and verified\n", 59 | "Files already downloaded and verified\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "import torchvision\n", 65 | "import os\n", 66 | "import json\n", 67 | "\n", 68 | "\n", 69 | "root_dir = '/tmp/'\n", 70 | "train_dataset = torchvision.datasets.CIFAR10(root=root_dir, train=True, download=True)\n", 71 | "eval_dataset = torchvision.datasets.CIFAR10(root=root_dir, train=False, download=True)\n", 72 | "\n", 73 | "\n", 74 | "idx = 0\n", 75 | "def build_image_text_dataset(root, folder, dataset):\n", 76 | " results = []\n", 77 | " global idx\n", 78 | " labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']\n", 79 | " if not os.path.exists(os.path.join(root,folder)):\n", 80 | " os.mkdir(os.path.join(root,folder))\n", 81 | " for img, label_idx in dataset:\n", 82 | " item = {}\n", 83 | " imgname = \"IMG{:06d}.png\".format(idx)\n", 84 | " filename = os.path.join(root, folder, imgname)\n", 85 | " idx = idx + 1\n", 86 | " caption = 'this is a picture of {}.'.format(labels[label_idx])\n", 87 | " img.save(filename)\n", 88 | " item['caption_id'] = idx\n", 89 | " item['image_id'] = idx\n", 90 | " item['caption'] = caption\n", 91 | " item['image_path'] = filename\n", 92 | " results.append(item)\n", 93 | " return results\n", 94 | "\n", 95 | "def gen_caption_meta(root, name, meta):\n", 96 | " save_path = os.path.join(root, name+'.json')\n", 97 | " with open(save_path, 'w') as fw:\n", 98 | " fw.write(json.dumps(meta, indent=4))\n", 99 | "\n", 100 | "train_results = build_image_text_dataset(root_dir, 'train', train_dataset)\n", 101 | "gen_caption_meta(root_dir, 'train', train_results)\n", 102 | "\n", 103 | "eval_results = build_image_text_dataset(root_dir, 'eval', eval_dataset)\n", 104 | "gen_caption_meta(root_dir, 'eval', eval_results)\n" 105 | ] 106 | }, 107 | { 108 | "attachments": {}, 109 | "cell_type": "markdown", 110 | "id": "2b111adf", 111 | "metadata": {}, 112 | "source": [ 113 | "Now we have an image-text annotation of CIFAR-10\n", 114 | "\n", 115 | "|caption ID|image ID | caption | image | image path|\n", 116 | "|:--------|:-------- |:----------|:--------|:----------|\n", 117 | "| 0 | 0 | this is a picture of frog.| | /tmp/train/IMG000000.png |\n", 118 | "| 1 | 1 | this is a picture of truck. | | /tmp/train/IMG000001.png |\n", 119 | "| 2 | 2 | this is a picture of truck. | | /tmp/train/IMG000002.png |\n", 120 | "| 3 | 3 | this is a picture of deer.| | /tmp/train/IMG000003.png |\n", 121 | "| 4 | 4 | this is a picture of automobile.| | /tmp/train/IMG000004.png |" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "id": "3e08849b", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "import towhee\n", 132 | "from towhee import ops\n", 133 | "#step1\n", 134 | "#get the operator, modality has no effect to the training model, it is only for the inference branch selection.\n", 135 | "clip_op = ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='image').get_op()\n", 136 | "\n", 137 | "\n", 138 | "#step2\n", 139 | "#trainer configuration, theses parameters are huggingface-style standard training configuration.\n", 140 | "data_args = {\n", 141 | " 'dataset_name': None,\n", 142 | " 'dataset_config_name': None,\n", 143 | " 'train_file': '/tmp/train.json',\n", 144 | " 'validation_file': '/tmp/eval.json',\n", 145 | " 'max_seq_length': 77,\n", 146 | " 'data_dir': None,\n", 147 | " 'image_mean': [0.48145466, 0.4578275, 0.40821073],\n", 148 | " \"image_std\": [0.26862954, 0.26130258, 0.27577711]\n", 149 | "}\n", 150 | "\n", 151 | "training_args = {\n", 152 | " 'num_train_epochs': 32, # you can add epoch number to get a better metric.\n", 153 | " 'per_device_train_batch_size': 64,\n", 154 | " 'per_device_eval_batch_size': 64,\n", 155 | " 'do_train': True,\n", 156 | " 'do_eval': True,\n", 157 | " 'eval_steps':1,\n", 158 | " 'remove_unused_columns': False,\n", 159 | " 'dataloader_drop_last': True,\n", 160 | " 'output_dir': './output/train_clip_exp',\n", 161 | " 'overwrite_output_dir': True,\n", 162 | "}\n", 163 | "\n", 164 | "model_args = {\n", 165 | " 'freeze_vision_model': False,\n", 166 | " 'freeze_text_model': False,\n", 167 | " 'cache_dir': './cache'\n", 168 | "}\n", 169 | "\n", 170 | "#step3\n", 171 | "#train your model\n", 172 | "clip_op.train(data_args=data_args, training_args=training_args, model_args=model_args)\n" 173 | ] 174 | }, 175 | { 176 | "attachments": {}, 177 | "cell_type": "markdown", 178 | "id": "9994a9f4", 179 | "metadata": {}, 180 | "source": [ 181 | "CLIP operator uses standard Hugging Face [Transformers](https://github.com/huggingface/transformers) training procedure to finetune the model. The detail of training configuration can be found at [transformers doc](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments).\n", 182 | "When training procedure is finished, we can load the trained weights into the operator." 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "id": "6afd13cd", 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='image', checkpoint_path='./output/train_clip_exp/checkpoint-5000/pytorch_model.bin')" 193 | ] 194 | }, 195 | { 196 | "attachments": {}, 197 | "cell_type": "markdown", 198 | "id": "414cfe20", 199 | "metadata": {}, 200 | "source": [ 201 | "## Making Our Text-Image Search Pipeline Production Ready\n", 202 | "\n", 203 | "The text-image pipeline now can be finetuned on customized dataset to get the gain from specific dataset. To put the text-image search engine into production, we also need to execute the whole pipeline in a highly-efficient way instead of original PyTorch execution.\n", 204 | "\n", 205 | "Towhee supports NVIDIA Triton Inference Server to improve performance for inferencing data for production-ready services. The supported model can be transfered to a Triton service just in a few lines.\n", 206 | "\n", 207 | "Operators can be packed into a Triton service for better inferencing performance. Some specific models of operator can be exported to ONNX models and achieve better acceleration (default is TorchScript).\n", 208 | "\n", 209 | "Before getting started, please make sure you have built `text_image_search` collection that uses the [L2 distance metric](https://milvus.io/docs/v2.0.x/metric.md#Euclidean-distance-L2) and an [IVF_FLAT index](https://milvus.io/docs/v2.0.x/index.md#IVF_FLAT) as the [previous tutorial](./1_build_text_image_search_engine.ipynb).\n", 210 | "\n", 211 | "### Check Operator \n", 212 | "Firstly, we need to check if the operator can be transfered to ONNX." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 3, 218 | "id": "20bb99a0", 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "full model list: ['clip_vit_base_patch16', 'clip_vit_base_patch32', 'clip_vit_large_patch14', 'clip_vit_large_patch14_336']\n", 226 | "onnx model list: ['clip_vit_base_patch16', 'clip_vit_base_patch32', 'clip_vit_large_patch14', 'clip_vit_large_patch14_336']\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "from towhee import ops, pipe\n", 232 | "import numpy as np\n", 233 | "\n", 234 | "op = ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='image').get_op()\n", 235 | "full_list = op.supported_model_names()\n", 236 | "onnx_list = op.supported_model_names(format='onnx')\n", 237 | "\n", 238 | "print('full model list:', full_list)\n", 239 | "print('onnx model list:', onnx_list)" 240 | ] 241 | }, 242 | { 243 | "attachments": {}, 244 | "cell_type": "markdown", 245 | "id": "8e8f12cf", 246 | "metadata": {}, 247 | "source": [ 248 | "All candidate models of CLIP can be transfered to ONNX model for the Triton pipeline acceleration.\n", 249 | "\n", 250 | "### Build Docker Service" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "id": "81f1f0f3", 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "op = ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='text').get_op()\n", 261 | "\n", 262 | "#your host machine IP address, e.g. 192.158.1.38\n", 263 | "ip_addr = '192.158.1.38'\n", 264 | "\n", 265 | "#make sure you have built Milvus collection successfully.\n", 266 | "p_search = (\n", 267 | " pipe.input('text')\n", 268 | " .map('text', 'vec', ops.image_text_embedding.clip(model_name='clip_vit_base_patch16', modality='text'), config={'device': 0})\n", 269 | " .map('vec', 'vec', lambda x: x / np.linalg.norm(x))\n", 270 | " .map('vec', ('search_res'), ops.ann_search.milvus_client(host=ip_addr, port='19530', limit=5, collection_name=\"text_image_search\", output_fields=['url']))\n", 271 | " .output('text','search_res')\n", 272 | ")\n", 273 | "\n", 274 | "towhee.build_docker_image(\n", 275 | " dc_pipeline=p_search,\n", 276 | " image_name='text_image_search:v1',\n", 277 | " cuda_version='11.7', # '117dev' for developer\n", 278 | " format_priority=['onnx'],\n", 279 | " inference_server='triton'\n", 280 | ")\n" 281 | ] 282 | }, 283 | { 284 | "attachments": {}, 285 | "cell_type": "markdown", 286 | "id": "226f16ba", 287 | "metadata": {}, 288 | "source": [ 289 | "After the docker image is built, the inferencing service and its associated model is resident in it. Start the service by running a docker container." 290 | ] 291 | }, 292 | { 293 | "attachments": {}, 294 | "cell_type": "markdown", 295 | "id": "a27395a2", 296 | "metadata": {}, 297 | "source": [ 298 | "```console\n", 299 | "docker run -td --gpus=all --shm-size=1g \\\n", 300 | " -p 8000:8000 -p 8001:8001 -p 8002:8002 \\\n", 301 | " text_image_search:v1 \\\n", 302 | " tritonserver --model-repository=/workspace/models\n", 303 | "```" 304 | ] 305 | }, 306 | { 307 | "attachments": {}, 308 | "cell_type": "markdown", 309 | "id": "4e3bafa9", 310 | "metadata": {}, 311 | "source": [ 312 | "### Inference with Triton Service\n", 313 | "Now we can use a client to visit the accelerated service." 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 26, 319 | "id": "c1be1ef7", 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "idx: 96, distance_score:1.35 , path: ./train/Bouvier_des_Flandres/n02106382_8906.JPEG\n", 327 | "idx: 506, distance_score:1.38 , path: ./train/Doberman/n02107142_4753.JPEG\n", 328 | "idx: 835, distance_score:1.38 , path: ./train/Afghan_hound/n02088094_3882.JPEG\n", 329 | "idx: 507, distance_score:1.39 , path: ./train/Doberman/n02107142_32921.JPEG\n", 330 | "idx: 832, distance_score:1.39 , path: ./train/Afghan_hound/n02088094_6565.JPEG\n" 331 | ] 332 | } 333 | ], 334 | "source": [ 335 | "from towhee import triton_client\n", 336 | "\n", 337 | "client = triton_client.Client(url='localhost:8000')\n", 338 | "\n", 339 | "data = \"a black dog.\"\n", 340 | "res = client(data)\n", 341 | "\n", 342 | "for idx, dis_score, path in res[0][1]:\n", 343 | " print('idx: {}, distance_score:{:.2f} , path: {}'.format(idx, dis_score, path))\n", 344 | "client.close()" 345 | ] 346 | } 347 | ], 348 | "metadata": { 349 | "kernelspec": { 350 | "display_name": "Python 3 (ipykernel)", 351 | "language": "python", 352 | "name": "python3" 353 | }, 354 | "language_info": { 355 | "codemirror_mode": { 356 | "name": "ipython", 357 | "version": 3 358 | }, 359 | "file_extension": ".py", 360 | "mimetype": "text/x-python", 361 | "name": "python", 362 | "nbconvert_exporter": "python", 363 | "pygments_lexer": "ipython3", 364 | "version": "3.8.12" 365 | } 366 | }, 367 | "nbformat": 4, 368 | "nbformat_minor": 5 369 | } 370 | -------------------------------------------------------------------------------- /image/text_image_search/README.md: -------------------------------------------------------------------------------- 1 | # Text Image Search 2 | 3 | Retrieval is the task of finding the most relevant object in a database given a query. In text-image search, we helps you search for matched or related images given an input text. 4 | 5 | This text-image search example mainly consists of two notebooks, and I think everyone can learn the basic operations of Towhee and Milvus through the [**getting started notebook**](./1_build_text_image_search_engine.ipynb). 6 | 7 | ## Learn from Notebook 8 | 9 | - [Getting started](./1_build_text_image_search_engine.ipynb) 10 | 11 | In this notebook you will get the prerequisites, how to complete a simple text-image search and visualize results,and show you how to start the FastAPI service. 12 | 13 | - [Deep dive](./2_deep_dive_text_image_search.ipynb) 14 | 15 | In this notebook you will learn how to finetune text-image search on your own dataset and how to deploy a accelerated service for text-image search application. 16 | 17 | - [Chinese version](./3_build_chinese_image_search_engine.ipynb) 18 | 19 | In this notebook you will learn how to how to build custom towhee ops and complete a simple chinese text-image search and image-image search task. 20 | -------------------------------------------------------------------------------- /image/text_image_search/teddy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/text_image_search/teddy.png -------------------------------------------------------------------------------- /image/text_image_search/train/IMG000000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/text_image_search/train/IMG000000.png -------------------------------------------------------------------------------- /image/text_image_search/train/IMG000001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/text_image_search/train/IMG000001.png -------------------------------------------------------------------------------- /image/text_image_search/train/IMG000002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/text_image_search/train/IMG000002.png -------------------------------------------------------------------------------- /image/text_image_search/train/IMG000003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/text_image_search/train/IMG000003.png -------------------------------------------------------------------------------- /image/text_image_search/train/IMG000004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/text_image_search/train/IMG000004.png -------------------------------------------------------------------------------- /image/text_image_search/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/text_image_search/workflow.png -------------------------------------------------------------------------------- /image/visualization/README.md: -------------------------------------------------------------------------------- 1 | # Visualization with Towhee 2 | 3 | There are some tutorials to visualization and learn around the principle in image search. For example, we can visualize the attention mechanism of embedding models with heatmaps, or use Feder to visualize the embedding approximate nearest neighbors search (ANNS) process. 4 | 5 | ## Learn from Notebook 6 | 7 | - [Visualize Models](./under_the_hood_embedding_models.ipynb) 8 | 9 | With some visualization tools in towhee, this tutorial show some examples for model interpretability. Towhee provides state-of-the-art interpretability and visualization algorithms, including attribution-based algorithms, embedding visualization algorithms, attention visualization algorithms, to provide researchers and developers with an easy way to understand features and which features are contributing to a model’s output. 10 | 11 | - [Visualize ANNS](./under_the_hood_anns_index.ipynb) 12 | 13 | This notebook will visualize the IVF_FLAT and HNSW index when searching images with [feder](https://github.com/zilliztech/feder), then compare whether to normalize the vector and whether to add object detection, and finally visualize the cross-model retrieval process, which we can use text to search for images. 14 | 15 | > More information about Feder you can learn from "[Visualize Your Approximate Nearest Neighbor Search with Feder](https://zilliz.com/blog/Visualize-Your-Approximate-Nearest-Neighbor-Search-with-Feder)" and "[Visualize Reverse Image Search with Feder](https://zilliz.com/blog/Visualize-Reverse-Image-Search-with-Feder)" 16 | -------------------------------------------------------------------------------- /image/visualization/cat_and_dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/visualization/cat_and_dog.png -------------------------------------------------------------------------------- /image/visualization/feder_towhee.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/visualization/feder_towhee.png -------------------------------------------------------------------------------- /image/visualization/towhee1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/visualization/towhee1.png -------------------------------------------------------------------------------- /image/visualization/towhee2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/visualization/towhee2.png -------------------------------------------------------------------------------- /image/visualization/towhee3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image/visualization/towhee3.png -------------------------------------------------------------------------------- /image/yolo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "458b4cd9", 6 | "metadata": {}, 7 | "source": [ 8 | "# Optimization of the Object Detection Operator with YOLO" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "1a34f387", 14 | "metadata": {}, 15 | "source": [ 16 | "[Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) is a cutting-edge, state-of-the-art (SOTA) model that builds upon the success of previous YOLO versions and introduces new features and improvements to further boost performance and flexibility. YOLOv8 is designed to be fast, accurate, and easy to use, making it an excellent choice for a wide range of object detection and tracking, instance segmentation, image classification and pose estimation tasks." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "53dc444c", 22 | "metadata": {}, 23 | "source": [ 24 | "## Run with YOLO\n", 25 | "\n", 26 | "First, let's run YOLO prediction code, which will be very simple according to the [tutorial](https://docs.ultralytics.com/modes/predict/)." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "4b5b2ce6", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "from ultralytics import YOLO\n", 37 | "model = YOLO(\"yolov8n.pt\") \n", 38 | "result = model(\"https://ultralytics.com/images/bus.jpg\")[0]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "id": "e01490e3", 44 | "metadata": {}, 45 | "source": [ 46 | "Then, we can get the information we want based on the predicted `result`, such as `boxes`, `classes` and `scores`." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "id": "5e163003", 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "boxes = [list(map(int, xyxy)) for xyxy in result.boxes.xyxy]\n", 57 | "classes = [result.names[int(i)] for i in result.boxes.cls]\n", 58 | "scores = result.boxes.conf.tolist()\n", 59 | "print(boxes, classes, scores)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "id": "3c6874eb", 65 | "metadata": {}, 66 | "source": [ 67 | "## Develop yolo operator\n", 68 | "\n", 69 | "According to [yolov5 operator](https://towhee.io/object-detection/yolov5), we can run the pipeline for image object detection in the following code:" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "id": "5ac79368", 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "from towhee import pipe, ops, DataCollection\n", 80 | "\n", 81 | "p = (\n", 82 | " pipe.input('path')\n", 83 | " .map('path', 'img', ops.image_decode.cv2_rgb())\n", 84 | " .map('img', ('box', 'class', 'score'), ops.object_detection.yolov5())\n", 85 | " .map(('img', 'box'), 'object', ops.image_crop(clamp=True))\n", 86 | " .output('img', 'object', 'class')\n", 87 | ")" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "c86c59d6", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "DataCollection(p(\"https://ultralytics.com/images/bus.jpg\")).show()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "id": "678b3d3a", 103 | "metadata": {}, 104 | "source": [ 105 | "Next, we can optimize the yolo arithmetic so as to support the yolov8 model, so we can develop yolov8 operator based on the yolov5 operator, for example, we develop the YOLOv8 class and develop the `__init__` and `__call__` function, so as to support YOLOv8 model." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "id": "e4e06fc8", 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "import numpy\n", 116 | "from towhee import register\n", 117 | "from towhee.operator import NNOperator\n", 118 | "from ultralytics import YOLO\n", 119 | "\n", 120 | "\n", 121 | "@register(name='yolov8')\n", 122 | "class YOLOv8(NNOperator):\n", 123 | " def __init__(self, model=\"yolov8n.pt\"):\n", 124 | " super().__init__()\n", 125 | " self._model = YOLO(model)\n", 126 | "\n", 127 | " def __call__(self, img: numpy.ndarray):\n", 128 | " results = self._model(img)[0]\n", 129 | " boxes = [list(map(int, xyxy)) for xyxy in result.boxes.xyxy]\n", 130 | " classes = [result.names[int(i)] for i in result.boxes.cls]\n", 131 | " scores = result.boxes.conf.tolist()\n", 132 | " return boxes, classes, scores\n" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "id": "c9301040", 138 | "metadata": {}, 139 | "source": [ 140 | "If you compare the YOLOv8 class with the YOLOv5 class, you will see that they are very similar, the difference is the usage of the models.\n", 141 | "\n", 142 | "Then, we can also take the names `yolov8` operator to the pipeline, a similar pipeline as above, but changing the operator to YOLOv8." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "id": "a336ab8a", 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "from towhee import pipe, ops, DataCollection\n", 153 | "\n", 154 | "p = (\n", 155 | " pipe.input('path')\n", 156 | " .map('path', 'img', ops.image_decode.cv2_rgb())\n", 157 | " .map('img', ('box', 'class', 'score'), ops.yolov8(model=\"yolov8n.pt\"))\n", 158 | " .map(('img', 'box'), 'object', ops.image_crop(clamp=True))\n", 159 | " .output('img', 'object', 'class')\n", 160 | ")" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "id": "442ebf2d", 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "DataCollection(p(\"https://ultralytics.com/images/bus.jpg\")).show()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "id": "54ac8f7a", 176 | "metadata": {}, 177 | "source": [ 178 | "Now we have completed the development of the YOLOv8 operator, as well as adding the operator to the pipeline, and the YOLOv8 model detects one more object, the stop sign, compared to the YOLOv5 model, so it is clear that YOLOv8 is a more complete detection than YOLOv5.\n", 179 | "\n", 180 | "We can also develop the `__init__` and `__call__` methods according to how models such as YOLOv6 are used, for example, to enable different models." 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "id": "2943fa74", 186 | "metadata": {}, 187 | "source": [ 188 | "## Set for training\n", 189 | "\n", 190 | "We can also train the YOLOv8 operator, according to the method of [YOLOv8 training](https://github.com/ultralytics/ultralytics/blob/dce4efce48a05e028e6ec430045431c242e52484/docs/yolov5/tutorials/train_custom_data.md), first of all, you have to manually create a \"dataset\" directory in the current directory, and then you can use COCO dataset to train the YOLOv8 model, or you can replace it with your own dataset." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "id": "9694e1e1", 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "import towhee\n", 201 | "\n", 202 | "op = towhee.ops.yolov8().get_op()" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "id": "340c963c", 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "op._model.train(data=\"coco128.yaml\", epochs=3)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "id": "f9004cb5", 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [] 222 | } 223 | ], 224 | "metadata": { 225 | "kernelspec": { 226 | "display_name": "Python 3 (ipykernel)", 227 | "language": "python", 228 | "name": "python3" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.10.12" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 5 245 | } 246 | -------------------------------------------------------------------------------- /image_generation/how_to_generate_image_given_text.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ef79dc4bd46859ab", 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "source": [ 10 | "# How to generate image using Towhee\n", 11 | "\n", 12 | "\n", 13 | "## Introduction\n", 14 | "\n", 15 | "We will use a Towhee operator [text2image/stable-diffusion](https://towhee.io/text2image/stable-diffusion) to generate image given text.\n", 16 | "\n", 17 | "### What is Image Generation?\n", 18 | "\n", 19 | "The field of generative image synthesis involves creating images using algorithms and models, often based on machine learning techniques. It combines computer vision, machine learning, and computational creativity to __generate new and visually appealing images that can be used in various applications.__\n", 20 | "\n", 21 | " There are also a large number of models that can be used during image generation:__GANs,VAEs,diffusion models__ and other efficient ones.They have led to impressive results in image synthesis, including generating photorealistic images, creating novel artwork, and even transferring styles between images.\n", 22 | " \n", 23 | "![image](./img.png)\n", 24 | "\n", 25 | "This little cute corgi picture is actually a product of image generation. \n", 26 | "\n", 27 | "### What is Towhee?\n", 28 | "\n", 29 | "Towhee is an open-source embedding framework that includes adequate data processing algorithms and neural network models. __With Towhee, you can easily process unstructured data (such as pictures, videos, audio, long texts and other unstructured data) and complete the conversion of raw data to vectors__.It is also an open algorithm and model exchange community where developers from all over the world can freely share models and pipelines to promote technology and efficiency.\n", 30 | "\n", 31 | "You can get to more useful information about this magic framework by clicking here [towhee](https://towhee.io/)." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "1b6fd386", 37 | "metadata": {}, 38 | "source": [ 39 | "## Preparation\n", 40 | "\n", 41 | "You are able to have more knowledge about how to generate images with towhee through the following materials.\n", 42 | "\n", 43 | "### Install Packages\n", 44 | "First,make sure that you have installed the required python packages with proper versions." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "65bb93d9", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "! python -m pip install towhee gradio" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "id": "4f4c6d65", 60 | "metadata": {}, 61 | "source": [ 62 | "## Getting Started\n", 63 | "\n", 64 | "Just get your image-generation journey started with the following towhee pipeline!" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "id": "5c3e6b6c", 71 | "metadata": { 72 | "ExecuteTime": { 73 | "end_time": "2023-09-27T10:54:42.972208300Z", 74 | "start_time": "2023-09-27T10:10:28.280300Z" 75 | } 76 | }, 77 | "outputs": [ 78 | { 79 | "name": "stderr", 80 | "output_type": "stream", 81 | "text": [ 82 | "vae\\diffusion_pytorch_model.safetensors not found\n" 83 | ] 84 | }, 85 | { 86 | "data": { 87 | "text/plain": "Loading pipeline components...: 0%| | 0/6 [00:00", 171 | "text/html": "
" 172 | }, 173 | "metadata": {}, 174 | "output_type": "display_data" 175 | }, 176 | { 177 | "data": { 178 | "text/plain": "" 179 | }, 180 | "execution_count": 5, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "import gradio\n", 187 | "from towhee import pipe\n", 188 | "\n", 189 | "interface = gradio.Interface(pipe,\n", 190 | " inputs=gradio.Textbox(label='prompt',info='type anything you want'),\n", 191 | " outputs=gradio.Image(type='numpy')\n", 192 | " )\n", 193 | "interface.launch(share=True)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 5, 199 | "id": "6b4c0b3b6edf923e", 200 | "metadata": { 201 | "collapsed": false, 202 | "ExecuteTime": { 203 | "end_time": "2023-09-27T10:54:51.205016500Z", 204 | "start_time": "2023-09-27T10:54:51.157942800Z" 205 | } 206 | }, 207 | "outputs": [], 208 | "source": [] 209 | } 210 | ], 211 | "metadata": { 212 | "kernelspec": { 213 | "display_name": "Python 3", 214 | "language": "python", 215 | "name": "python3" 216 | }, 217 | "language_info": { 218 | "codemirror_mode": { 219 | "name": "ipython", 220 | "version": 2 221 | }, 222 | "file_extension": ".py", 223 | "mimetype": "text/x-python", 224 | "name": "python", 225 | "nbconvert_exporter": "python", 226 | "pygments_lexer": "ipython2", 227 | "version": "2.7.6" 228 | } 229 | }, 230 | "nbformat": 4, 231 | "nbformat_minor": 5 232 | } 233 | -------------------------------------------------------------------------------- /image_generation/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/image_generation/img.png -------------------------------------------------------------------------------- /medical/molecular_search/README.md: -------------------------------------------------------------------------------- 1 | # Molecular Search 2 | 3 | Drug discovery, as the source of medical innovation, is an important part of new medicine research and development. Drug discovery is implemented by target selection and confirmation. In order to discover available compounds in the fragment space from billion-scale compound libraries, chemical fingerprint is usually retrieved for substructure search and similarity search. 4 | 5 | 6 | 7 | This example will show you how to find the similar, sub or super molecular formula. It mainly consists of two notebooks, I think everyone can learn the basic operations of Molecular Search System through the [**getting started notebook**](./1_build_molecular_search_engine.ipynb). And the [**deep dive notebook**](./2_deep_dive_molecular_search.ipynb) will show you how to deploy the showcase. 8 | 9 | ## Learn from Notebook 10 | 11 | - [Getting started](1_build_molecular_search_engine.ipynb) 12 | 13 | In this notebook you will get the prerequisites, how to complete a simple molecular search system and visualize the results. 14 | 15 | - [Deep Dive](./2_deep_dive_molecular_search.ipynb) 16 | 17 | In this notebook you will learn how to search sub and super structure, and finally show you how to start the Radio service. 18 | -------------------------------------------------------------------------------- /nlp/question_answering/1_build_question_answering_engine.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "e410b6c4", 6 | "metadata": {}, 7 | "source": [ 8 | "# Build a Qusetion Answering Engine in Minutes" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "9bed6f24", 14 | "metadata": {}, 15 | "source": [ 16 | "This notebook illustrates how to build a question answering engine from scratch using [Milvus](https://milvus.io/) and [Towhee](https://towhee.io/). Milvus is the most advanced open-source vector database built for AI applications and supports nearest neighbor embedding search across tens of millions of entries, and Towhee is a framework that provides ETL for unstructured data using SoTA machine learning models.\n", 17 | "\n", 18 | "We will go through question answering procedures and evaluate performance. Moreover, we managed to make the core functionality as simple as almost 10 lines of code with Towhee, so that you can start hacking your own question answering engine." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "id": "4883e577", 24 | "metadata": {}, 25 | "source": [ 26 | "## Preparations" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "id": "49110b91", 32 | "metadata": {}, 33 | "source": [ 34 | "### Install Dependencies" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "0117995a", 40 | "metadata": {}, 41 | "source": [ 42 | "First we need to install dependencies such as towhee, towhee.models and gradio." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 41, 48 | "id": "c9ba3850", 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "\n", 56 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0\u001b[0m\n", 57 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "! python -m pip install -q towhee towhee.models gradio" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "id": "a90db0c5", 68 | "metadata": {}, 69 | "source": [ 70 | "### Prepare the Data" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "id": "d1eceb58", 76 | "metadata": {}, 77 | "source": [ 78 | "There is a subset of the [InsuranceQA Corpus](https://github.com/shuzi/insuranceQA) (1000 pairs of questions and answers) used in this demo, everyone can download on [Github](https://github.com/towhee-io/examples/releases/download/data/question_answer.csv)." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 1, 84 | "id": "d1436a9c", 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 92 | " Dload Upload Total Spent Left Speed\n", 93 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", 94 | "100 595k 100 595k 0 0 286k 0 0:00:02 0:00:02 --:--:-- 666k\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/question_answer.csv -O" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "c4abdc0a", 105 | "metadata": {}, 106 | "source": [ 107 | "**question_answer.csv**: a file containing question and the answer.\n", 108 | "\n", 109 | "Let's take a quick look:" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 2, 115 | "id": "d652efea", 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/html": [ 121 | "
\n", 122 | "\n", 135 | "\n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | "
idquestionanswer
00Is Disability Insurance Required By Law?Not generally. There are five states that requ...
11Can Creditors Take Life Insurance After ...If the person who passed away was the one with...
22Does Travelers Insurance Have Renters Ins...One of the insurance carriers I represent is T...
33Can I Drive A New Car Home Without Ins...Most auto dealers will not let you drive the c...
44Is The Cash Surrender Value Of Life Ins...Cash surrender value comes only with Whole Lif...
\n", 177 | "
" 178 | ], 179 | "text/plain": [ 180 | " id question \\\n", 181 | "0 0 Is Disability Insurance Required By Law? \n", 182 | "1 1 Can Creditors Take Life Insurance After ... \n", 183 | "2 2 Does Travelers Insurance Have Renters Ins... \n", 184 | "3 3 Can I Drive A New Car Home Without Ins... \n", 185 | "4 4 Is The Cash Surrender Value Of Life Ins... \n", 186 | "\n", 187 | " answer \n", 188 | "0 Not generally. There are five states that requ... \n", 189 | "1 If the person who passed away was the one with... \n", 190 | "2 One of the insurance carriers I represent is T... \n", 191 | "3 Most auto dealers will not let you drive the c... \n", 192 | "4 Cash surrender value comes only with Whole Lif... " 193 | ] 194 | }, 195 | "execution_count": 2, 196 | "metadata": {}, 197 | "output_type": "execute_result" 198 | } 199 | ], 200 | "source": [ 201 | "import pandas as pd\n", 202 | "\n", 203 | "df = pd.read_csv('question_answer.csv')\n", 204 | "df.head()" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "id": "309bfb43", 210 | "metadata": {}, 211 | "source": [ 212 | "To use the dataset to get answers, let's first define the dictionary:\n", 213 | "\n", 214 | "- `id_answer`: a dictionary of id and corresponding answer" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 3, 220 | "id": "4d98b309", 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "id_answer = df.set_index('id')['answer'].to_dict()" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "id": "1c5a0858", 230 | "metadata": {}, 231 | "source": [ 232 | "### Create Milvus Collection" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "id": "efb06a01", 238 | "metadata": {}, 239 | "source": [ 240 | "Before getting started, please make sure that you have started a [Milvus service](https://milvus.io/docs/install_standalone-docker.md). This notebook uses [milvus 2.2.10](https://milvus.io/docs/v2.2.x/install_standalone-docker.md) and [pymilvus 2.2.11](https://milvus.io/docs/release_notes.md#2210)." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "id": "8048bf6c", 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "! python -m pip install -q pymilvus==2.2.11" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "id": "87ba2b23", 256 | "metadata": {}, 257 | "source": [ 258 | "Next to define the function `create_milvus_collection` to create collection in Milvus that uses the [L2 distance metric](https://milvus.io/docs/metric.md#Euclidean-distance-L2) and an [IVF_FLAT index](https://milvus.io/docs/index.md#IVF_FLAT)." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 4, 264 | "id": "22c19982", 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility\n", 269 | "\n", 270 | "connections.connect(host='127.0.0.1', port='19530')\n", 271 | "\n", 272 | "def create_milvus_collection(collection_name, dim):\n", 273 | " if utility.has_collection(collection_name):\n", 274 | " utility.drop_collection(collection_name)\n", 275 | " \n", 276 | " fields = [\n", 277 | " FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False),\n", 278 | " FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)\n", 279 | " ]\n", 280 | " schema = CollectionSchema(fields=fields, description='reverse image search')\n", 281 | " collection = Collection(name=collection_name, schema=schema)\n", 282 | "\n", 283 | " # create IVF_FLAT index for collection.\n", 284 | " index_params = {\n", 285 | " 'metric_type':'L2',\n", 286 | " 'index_type':\"IVF_FLAT\",\n", 287 | " 'params':{\"nlist\":2048}\n", 288 | " }\n", 289 | " collection.create_index(field_name=\"embedding\", index_params=index_params)\n", 290 | " return collection\n", 291 | "\n", 292 | "collection = create_milvus_collection('question_answer', 768)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "id": "9724ba28", 298 | "metadata": {}, 299 | "source": [ 300 | "## Question Answering Engine" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "id": "01e1ac7e", 306 | "metadata": {}, 307 | "source": [ 308 | "In this section, we will show how to build our question answering engine using Milvus and Towhee. The basic idea behind question answering is to use Towhee to generate embedding from the question dataset and compare the input question with the embedding stored in Milvus.\n", 309 | "\n", 310 | "[Towhee](https://towhee.io/) is a machine learning framework that allows the creation of data processing pipelines, and it also provides predefined operators for implementing insert and query operations in Milvus.\n", 311 | "\n", 312 | "" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "id": "4c0188bf", 318 | "metadata": {}, 319 | "source": [ 320 | "### Load question embedding into Milvus" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "id": "0a654fdc", 326 | "metadata": {}, 327 | "source": [ 328 | "We first generate embedding from question text with [dpr](https://towhee.io/text-embedding/dpr) operator and insert the embedding into Milvus. Towhee provides a [method-chaining style API](https://towhee.readthedocs.io/en/main/index.html) so that users can assemble a data processing pipeline with operators." 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 5, 334 | "id": "13b7beea", 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "name": "stdout", 339 | "output_type": "stream", 340 | "text": [ 341 | "CPU times: user 2min 37s, sys: 3min 59s, total: 6min 37s\n", 342 | "Wall time: 1min 27s\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "%%time\n", 348 | "from towhee import pipe, ops\n", 349 | "import numpy as np\n", 350 | "from towhee.datacollection import DataCollection\n", 351 | "\n", 352 | "insert_pipe = (\n", 353 | " pipe.input('id', 'question', 'answer')\n", 354 | " .map('question', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base'))\n", 355 | " .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))\n", 356 | " .map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1', port='19530', collection_name='question_answer'))\n", 357 | " .output()\n", 358 | ")\n", 359 | "\n", 360 | "import csv\n", 361 | "with open('question_answer.csv', encoding='utf-8') as f:\n", 362 | " reader = csv.reader(f)\n", 363 | " next(reader)\n", 364 | " for row in reader:\n", 365 | " insert_pipe(*row)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 6, 371 | "id": "1adbb2e1", 372 | "metadata": {}, 373 | "outputs": [ 374 | { 375 | "name": "stdout", 376 | "output_type": "stream", 377 | "text": [ 378 | "Total number of inserted data is 1000.\n" 379 | ] 380 | } 381 | ], 382 | "source": [ 383 | "print('Total number of inserted data is {}.'.format(collection.num_entities))" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "id": "deb269f4", 389 | "metadata": {}, 390 | "source": [ 391 | "#### Explanation of Data Processing Pipeline\n", 392 | "\n", 393 | "Here is detailed explanation for each line of the code:\n", 394 | "\n", 395 | "`pipe.input('id', 'question', 'answer')`: Get three inputs, namely question's id, quesion's text and question's answer;\n", 396 | "\n", 397 | "`map('question', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base'))`: Use the `acebook/dpr-ctx_encoder-single-nq-base` model to generate the question embedding vector with the [dpr operator](https://towhee.io/text-embedding/dpr) in towhee hub;\n", 398 | "\n", 399 | "`map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))`: normalize the embedding vector;\n", 400 | "\n", 401 | "`map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1', port='19530', collection_name='question_answer'))`: insert question embedding vector into Milvus;" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "id": "b35657d0", 407 | "metadata": {}, 408 | "source": [ 409 | "### Ask Question with Milvus and Towhee" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "id": "cd02adfc", 415 | "metadata": {}, 416 | "source": [ 417 | "Now that embedding for question dataset have been inserted into Milvus, we can ask question with Milvus and Towhee. Again, we use Towhee to load the input question, compute a embedding, and use it as a query in Milvus. Because Milvus only outputs IDs and distance values, we provide the `id_answers` dictionary to get the answers based on IDs and display." 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 7, 423 | "id": "95913f05", 424 | "metadata": {}, 425 | "outputs": [ 426 | { 427 | "data": { 428 | "text/html": [ 429 | "
question answer
Is Disability Insurance Required By Law?
Not generally. There are five states that require most all employers carry short term disability insurance on their employees. T...
" 430 | ], 431 | "text/plain": [ 432 | "" 433 | ] 434 | }, 435 | "metadata": {}, 436 | "output_type": "display_data" 437 | }, 438 | { 439 | "name": "stdout", 440 | "output_type": "stream", 441 | "text": [ 442 | "CPU times: user 1.12 s, sys: 375 ms, total: 1.49 s\n", 443 | "Wall time: 16.7 s\n" 444 | ] 445 | } 446 | ], 447 | "source": [ 448 | "%%time\n", 449 | "collection.load()\n", 450 | "ans_pipe = (\n", 451 | " pipe.input('question')\n", 452 | " .map('question', 'vec', ops.text_embedding.dpr(model_name=\"facebook/dpr-ctx_encoder-single-nq-base\"))\n", 453 | " .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))\n", 454 | " .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='question_answer', limit=1))\n", 455 | " .map('res', 'answer', lambda x: [id_answer[int(i[0])] for i in x])\n", 456 | " .output('question', 'answer')\n", 457 | ")\n", 458 | "\n", 459 | "\n", 460 | "ans = ans_pipe('Is Disability Insurance Required By Law?')\n", 461 | "ans = DataCollection(ans)\n", 462 | "ans.show()" 463 | ] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "id": "bfb05a79", 468 | "metadata": {}, 469 | "source": [ 470 | "Then we can get the answer about 'Is Disability Insurance Required By Law?'." 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 8, 476 | "id": "cb1a8f96", 477 | "metadata": {}, 478 | "outputs": [ 479 | { 480 | "data": { 481 | "text/plain": [ 482 | "['Not generally. There are five states that require most all employers carry short term disability insurance on their employees. These states are: California, Hawaii, New Jersey, New York, and Rhode Island. Besides this mandatory short term disability law, there is no other legislative imperative for someone to purchase or be covered by disability insurance.']" 483 | ] 484 | }, 485 | "execution_count": 8, 486 | "metadata": {}, 487 | "output_type": "execute_result" 488 | } 489 | ], 490 | "source": [ 491 | "ans[0]['answer']" 492 | ] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "id": "01bef722", 497 | "metadata": {}, 498 | "source": [ 499 | "## Release a Showcase" 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "id": "c71cace8", 505 | "metadata": {}, 506 | "source": [ 507 | "We've done an excellent job on the core functionality of our question answering engine. Now it's time to build a showcase with interface. [Gradio](https://gradio.app/) is a great tool for building demos. With Gradio, we simply need to wrap the data processing pipeline via a `chat` function:" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 9, 513 | "id": "65d42114", 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "import towhee\n", 518 | "def chat(message, history):\n", 519 | " history = history or []\n", 520 | " ans_pipe = (\n", 521 | " pipe.input('question')\n", 522 | " .map('question', 'vec', ops.text_embedding.dpr(model_name=\"facebook/dpr-ctx_encoder-single-nq-base\"))\n", 523 | " .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0))\n", 524 | " .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='question_answer', limit=1))\n", 525 | " .map('res', 'answer', lambda x: [id_answer[int(i[0])] for i in x])\n", 526 | " .output('question', 'answer')\n", 527 | " )\n", 528 | "\n", 529 | " response = ans_pipe(message).get()[1][0]\n", 530 | " history.append((message, response))\n", 531 | " return history, history" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 10, 537 | "id": "065523a7", 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "name": "stdout", 542 | "output_type": "stream", 543 | "text": [ 544 | "Running on local URL: http://127.0.0.1:7860\n", 545 | "Running on public URL: https://7efbf90b-a281-48f9.gradio.live\n", 546 | "\n", 547 | "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n" 548 | ] 549 | }, 550 | { 551 | "data": { 552 | "text/html": [ 553 | "
" 554 | ], 555 | "text/plain": [ 556 | "" 557 | ] 558 | }, 559 | "metadata": {}, 560 | "output_type": "display_data" 561 | }, 562 | { 563 | "data": { 564 | "text/plain": [] 565 | }, 566 | "execution_count": 10, 567 | "metadata": {}, 568 | "output_type": "execute_result" 569 | } 570 | ], 571 | "source": [ 572 | "import gradio\n", 573 | "\n", 574 | "collection.load()\n", 575 | "chatbot = gradio.Chatbot(color_map=(\"green\", \"gray\"))\n", 576 | "interface = gradio.Interface(\n", 577 | " chat,\n", 578 | " [\"text\", \"state\"],\n", 579 | " [chatbot, \"state\"],\n", 580 | " allow_screenshot=False,\n", 581 | " allow_flagging=\"never\",\n", 582 | ")\n", 583 | "interface.launch(inline=True, share=True)" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": null, 589 | "id": "23806967", 590 | "metadata": {}, 591 | "outputs": [], 592 | "source": [] 593 | } 594 | ], 595 | "metadata": { 596 | "kernelspec": { 597 | "display_name": "Python 3 (ipykernel)", 598 | "language": "python", 599 | "name": "python3" 600 | }, 601 | "language_info": { 602 | "codemirror_mode": { 603 | "name": "ipython", 604 | "version": 3 605 | }, 606 | "file_extension": ".py", 607 | "mimetype": "text/x-python", 608 | "name": "python", 609 | "nbconvert_exporter": "python", 610 | "pygments_lexer": "ipython3", 611 | "version": "3.8.12" 612 | }, 613 | "vscode": { 614 | "interpreter": { 615 | "hash": "f7dd10cdbe9a9c71f7e71741efd428241b5f9fa0fecdd29ae07a5706cd5ff8a2" 616 | } 617 | } 618 | }, 619 | "nbformat": 4, 620 | "nbformat_minor": 5 621 | } 622 | -------------------------------------------------------------------------------- /nlp/question_answering/README.md: -------------------------------------------------------------------------------- 1 | # Question Answering 2 | 3 | Question answering is a classic problem in the field of natural language processing. While it sounds like an easy problem to solve, there is still a lot of research going on to improve the techniques that we have now. A large part of solving questions is finding questions that are similar to the one being asked. 4 | 5 | 6 | 7 | This example will show you how to find the similar asked question and get the answer. It mainly consists of two notebooks, hoping everyone can learn basic operations of Question Answering System through the [**getting started notebook**](./1_build_question_answering_engine.ipynb). 8 | 9 | ## Learn from Notebook 10 | 11 | - [Getting started](1_build_question_answering_engine.ipynb) 12 | 13 | In this notebook you will get the prerequisites, how to complete a simple question answering system and release a showcase. 14 | -------------------------------------------------------------------------------- /nlp/question_answering/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/nlp/question_answering/workflow.png -------------------------------------------------------------------------------- /nlp/text_search/README.md: -------------------------------------------------------------------------------- 1 | # Text Search 2 | 3 | Search for text in the text dataset, and it will find the most similar results to the search text across all data. Searching for text is different from traditional keyword searches, which search for semantically relevant content. 4 | 5 | 6 | 7 | This example will show you how to search article in Medium data through the [**notebook**](search_article_in_medium.ipynb). 8 | -------------------------------------------------------------------------------- /pipeline/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/1.jpg -------------------------------------------------------------------------------- /pipeline/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/2.jpg -------------------------------------------------------------------------------- /pipeline/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/3.jpg -------------------------------------------------------------------------------- /pipeline/README.md: -------------------------------------------------------------------------------- 1 | # Getting Started with Towhee 2 | 3 | Before running other Bootcamps, please take a few minutes to learn about Towhee. After going through these notebooks, you will be more familiar with the data processing pipeline in Towhee: 4 | 5 | - [Introduction to Pipeline](getting_started_with_pipeline.ipynb) 6 | 7 | It will show you how to use `Pipeline` to process data, customize Towhee operators, and organize your own workflow. 8 | 9 | -------------------------------------------------------------------------------- /pipeline/broken.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/broken.jpg -------------------------------------------------------------------------------- /pipeline/concat_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/concat_node.png -------------------------------------------------------------------------------- /pipeline/filter_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/filter_node.png -------------------------------------------------------------------------------- /pipeline/flat_map_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/flat_map_node.png -------------------------------------------------------------------------------- /pipeline/map_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/map_node.png -------------------------------------------------------------------------------- /pipeline/time_window_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/time_window_node.png -------------------------------------------------------------------------------- /pipeline/window_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/window_all.png -------------------------------------------------------------------------------- /pipeline/window_node.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/pipeline/window_node.png -------------------------------------------------------------------------------- /video/deepfake_detection/1_deepfake_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "68a8e57f", 6 | "metadata": {}, 7 | "source": [ 8 | "# Build a Deepfake Detection System \n", 9 | "\n", 10 | "This notebook illustrates how to build a video deepfake detection system from scratch using [Towhee](https://towhee.io/). A deepfake detection system predict videos with probabilities being fake ones. This tutorial will use examples from [Deepfake Detection Challenge](https://www.kaggle.com/competitions/deepfake-detection-challenge).\n", 11 | "\n", 12 | "Using the sample data of different videos, we will build a basic deepfake detection system within a few lines of code and check the performance using Towhee. At the end, we use [Gradio](https://gradio.app/) to create a showcase that can be played with." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "6ddf7f23", 18 | "metadata": {}, 19 | "source": [ 20 | "## Preparation\n", 21 | "\n", 22 | "### Install packages\n", 23 | "\n", 24 | "Make sure you have installed required python packages:\n", 25 | "\n", 26 | "| package |\n", 27 | "| -- |\n", 28 | "| towhee |\n", 29 | "| towhee.models |\n", 30 | "| dlib |\n", 31 | "| facenet-pytorch |\n", 32 | "| albumentations |\n", 33 | "| timm |\n", 34 | "| pytorch_toolbelt |\n", 35 | "| tensorboardx |\n", 36 | "| tqdm |\n", 37 | "| gradio |" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 1, 43 | "id": "96393479", 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "! python -m pip install -q towhee towhee.models dlib facenet-pytorch albumentations timm pytorch_toolbelt tensorboardx tqdm gradio" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "id": "8355528c", 53 | "metadata": {}, 54 | "source": [ 55 | "### Prepare data\n", 56 | "\n", 57 | "This tutorial will use a small data extracted from test dataset of [Deepfake Detection Challenge](https://www.kaggle.com/competitions/deepfake-detection-challenge/overview). You can download the subset from [Github](https://github.com/towhee-io/examples/releases/download/data/deepfake_video.zip). This tutorial will just use 2 videos under `test` as example.\n", 58 | "\n", 59 | "The data is organized as follows:\n", 60 | "- **test:** 2 videos from Deepfake Detection Challenge test dataset\n", 61 | "\n", 62 | "Let's take a quick look:" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 8, 68 | "id": "ce2ad8ee", 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 76 | " Dload Upload Total Spent Left Speed\n", 77 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", 78 | "100 14.3M 100 14.3M 0 0 1592k 0 0:00:09 0:00:09 --:--:-- 3052k\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/deepfake_video.zip -O\n", 84 | "! unzip -q -o deepfake_video.zip" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "db28cef1", 90 | "metadata": {}, 91 | "source": [ 92 | "## Build System\n", 93 | "\n", 94 | "Now we are ready to build a deepfake detection system using sample data. We will use the [Combined efficientnet](https://arxiv.org/abs/2107.02612) model to predict the possibilites of being fake ones for input videos. With proper [Towhee operators](https://towhee.io/operators), you don't need to go through video preprocessing & model details." 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 27, 100 | "id": "6047691f", 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/html": [ 106 | "
scores
0.99004173
" 107 | ], 108 | "text/plain": [ 109 | "" 110 | ] 111 | }, 112 | "metadata": {}, 113 | "output_type": "display_data" 114 | } 115 | ], 116 | "source": [ 117 | "from towhee import ops, pipe, DataCollection\n", 118 | "\n", 119 | "p = (\n", 120 | " pipe.input('path')\n", 121 | " .map('path', 'scores', ops.towhee.deepfake())\n", 122 | " .output('scores')\n", 123 | ")\n", 124 | "\n", 125 | "DataCollection(p('./deepfake_video/test/aagfhgtpmv.mp4')).show()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "id": "8980823c", 131 | "metadata": {}, 132 | "source": [ 133 | "#### Pipeline Explanation\n", 134 | "\n", 135 | "Here are some details for each line of the assemble pipeline:\n", 136 | "\n", 137 | "- `towhee.deepfake()`: a Towhee operator applying pretrained models to predict the probabilite a video being a fake one. The higher the score, the higher the probability of it being a fake video.[learn more](https://towhee.io/towhee/deepfake)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "id": "77ffadef", 143 | "metadata": {}, 144 | "source": [ 145 | "## Release a Showcase\n", 146 | "\n", 147 | "We've learnt how to build a deepfake detection system. Now it's time to add some interface and release a showcase. We use `deepfake_detection_function` to wrap the pipeline as a [Gradio](https://gradio.app/) application." 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 28, 153 | "id": "a82a7101", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "def deepfake_detection_function(path):\n", 158 | " return p(path).get_dict()['scores']" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 29, 164 | "id": "2097730d", 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "Running on local URL: http://127.0.0.1:7863\n", 172 | "Running on public URL: https://0fb299f8-a2b2-4257.gradio.live\n", 173 | "\n", 174 | "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n" 175 | ] 176 | }, 177 | { 178 | "data": { 179 | "text/html": [ 180 | "
" 181 | ], 182 | "text/plain": [ 183 | "" 184 | ] 185 | }, 186 | "metadata": {}, 187 | "output_type": "display_data" 188 | }, 189 | { 190 | "data": { 191 | "text/plain": [] 192 | }, 193 | "execution_count": 29, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | }, 197 | { 198 | "name": "stdout", 199 | "output_type": "stream", 200 | "text": [ 201 | "loading state dict /home/xuyu/.towhee/operators/towhee/deepfake/main/weights/final_999_DeepFakeClassifier_tf_efficientnet_b7_ns_0_23\n", 202 | "loading state dict /home/xuyu/.towhee/operators/towhee/deepfake/main/weights/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_31\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "import gradio\n", 208 | "\n", 209 | "interface = gradio.Interface(deepfake_detection_function, \n", 210 | " inputs = gradio.Video(source='upload'),\n", 211 | " outputs = [gradio.Textbox(lines=1)]\n", 212 | " )\n", 213 | "\n", 214 | "interface.launch(inline=True, share=True)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "id": "c3fe8e85", 220 | "metadata": {}, 221 | "source": [ 222 | "deepfake_detection_demo" 223 | ] 224 | } 225 | ], 226 | "metadata": { 227 | "kernelspec": { 228 | "display_name": "Python 3 (ipykernel)", 229 | "language": "python", 230 | "name": "python3" 231 | }, 232 | "language_info": { 233 | "codemirror_mode": { 234 | "name": "ipython", 235 | "version": 3 236 | }, 237 | "file_extension": ".py", 238 | "mimetype": "text/x-python", 239 | "name": "python", 240 | "nbconvert_exporter": "python", 241 | "pygments_lexer": "ipython3", 242 | "version": "3.8.12" 243 | }, 244 | "vscode": { 245 | "interpreter": { 246 | "hash": "ee8f9d005f921b11e37646322d569d83ab1bb8b2f1f9e1244f064a47f10136b5" 247 | } 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 5 252 | } 253 | -------------------------------------------------------------------------------- /video/deepfake_detection/README.md: -------------------------------------------------------------------------------- 1 | # Deepfake Detection 2 | 3 | Deepfake techniques, which present realistic AI-generated videos of people doing and saying fictional things, have the potential to have a significant impact on how people determine the legitimacy of information presented online. 4 | 5 | This deepfake detection bootcamp mainly includes one notebook for fake video detection. You can learn about deepfake detection solutions and basic concepts of [Towhee](https://towhee.io/) from these notebook tutorials. 6 | 7 | ## Learn from Notebook 8 | 9 | - [Deepfake Detection](./1_deepfake_detection.ipynb) 10 | 11 | In this notebook you will build a basic deepfake detection system with pretrained models. At the end, you are able to build up a playable deepfake detection system with 5 lines of code. 12 | -------------------------------------------------------------------------------- /video/deepfake_detection/deepfake.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/deepfake_detection/deepfake.jpg -------------------------------------------------------------------------------- /video/reverse_video_search/2_deep_dive_reverse_video_search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8347485f", 6 | "metadata": {}, 7 | "source": [ 8 | "# Deep Dive Reverse Video Search\n", 9 | "\n", 10 | "In the [previous tutorial](./1_reverse_video_search_engine.ipynb), we've learnt how to build a reverse video search engine. Now let's make the solution more feasible in production." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "569571ec", 16 | "metadata": {}, 17 | "source": [ 18 | "## Preparation\n", 19 | "\n", 20 | "Let's recall preparation steps first:\n", 21 | "1. Install packages\n", 22 | "2. Prepare data\n", 23 | "3. Start milvus\n", 24 | "\n", 25 | "### Install packages\n", 26 | "\n", 27 | "Make sure you have installed required python packages:\n", 28 | "\n", 29 | "| package |\n", 30 | "| -- |\n", 31 | "| towhee |\n", 32 | "| towhee.models |\n", 33 | "| pillow |\n", 34 | "| ipython |\n", 35 | "| fastapi |" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "id": "d2d8e3e7", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "! python -m pip install -q towhee towhee.models" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "id": "11ef6b1a", 51 | "metadata": {}, 52 | "source": [ 53 | "### Prepare data\n", 54 | "\n", 55 | "This tutorial will use a small data extracted from [Kinetics400](https://www.deepmind.com/open-source/kinetics). You can download the subset from [Github](https://github.com/towhee-io/examples/releases/download/data/reverse_video_search.zip). \n", 56 | "\n", 57 | "The data is organized as follows:\n", 58 | "- **train:** candidate videos, 20 classes, 10 videos per class (200 in total)\n", 59 | "- **test:** query videos, same 20 classes as train data, 1 video per class (20 in total)\n", 60 | "- **reverse_video_search.csv:** a csv file containing an ***id***, ***path***, and ***label*** for each video in train data" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 2, 66 | "id": "54568b1a", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "! curl -L https://github.com/towhee-io/examples/releases/download/data/reverse_video_search.zip -O\n", 71 | "! unzip -q -o reverse_video_search.zip" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "id": "2171f2e7", 77 | "metadata": {}, 78 | "source": [ 79 | "For later steps to easier get videos & measure results, we build some helpful functions in advance:\n", 80 | "- **ground_truth:** get ground-truth video ids for the query video by its path" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 1, 86 | "id": "dd1b0ef0", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "import pandas as pd\n", 91 | "\n", 92 | "df = pd.read_csv('./reverse_video_search.csv')\n", 93 | "\n", 94 | "id_video = df.set_index('id')['path'].to_dict()\n", 95 | "label_ids = {}\n", 96 | "for label in set(df['label']):\n", 97 | " label_ids[label] = list(df[df['label']==label].id)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "id": "b3e98f62", 103 | "metadata": {}, 104 | "source": [ 105 | "### Start Milvus\n", 106 | "\n", 107 | "Before getting started with the engine, we also need to get ready with Milvus. Please make sure that you have started a [Milvus service](https://milvus.io/docs/install_standalone-docker.md). This notebook uses [milvus 2.2.10](https://milvus.io/docs/v2.2.x/install_standalone-docker.md) and [pymilvus 2.2.11](https://milvus.io/docs/release_notes.md#2210)." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "id": "123fc72f", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "! python -m pip install -q pymilvus==2.2.11" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "id": "7665543e", 123 | "metadata": {}, 124 | "source": [ 125 | "Here we prepare a function to work with a Milvus collection with the following parameters:\n", 126 | "- [L2 distance metric](https://milvus.io/docs/metric.md#Euclidean-distance-L2)\n", 127 | "- [IVF_FLAT index](https://milvus.io/docs/index.md#IVF_FLAT)." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 2, 133 | "id": "f4fbffa1", 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility\n", 138 | "\n", 139 | "connections.connect(host='127.0.0.1', port='19530')\n", 140 | "\n", 141 | "def create_milvus_collection(collection_name, dim):\n", 142 | " if utility.has_collection(collection_name):\n", 143 | " utility.drop_collection(collection_name)\n", 144 | " \n", 145 | " fields = [\n", 146 | " FieldSchema(name='id', dtype=DataType.INT64, descrition='ids', is_primary=True, auto_id=False),\n", 147 | " FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)\n", 148 | " ]\n", 149 | " schema = CollectionSchema(fields=fields, description='deep dive reverse video search')\n", 150 | " collection = Collection(name=collection_name, schema=schema)\n", 151 | "\n", 152 | " # create IVF_FLAT index for collection.\n", 153 | " index_params = {\n", 154 | " 'metric_type':'L2',\n", 155 | " 'index_type':\"IVF_FLAT\",\n", 156 | " 'params':{\"nlist\": 400}\n", 157 | " }\n", 158 | " collection.create_index(field_name=\"embedding\", index_params=index_params)\n", 159 | " return collection" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "750d8e66", 165 | "metadata": {}, 166 | "source": [ 167 | "### Build Engine\n", 168 | "\n", 169 | "Now we are ready to build a reverse-video-search engine. Here we show an engine built with [`TimeSformer model`](https://towhee.io/action-classification/timesformer) and its performance to make comparasion later." 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 3, 175 | "id": "86790dac", 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "def read_csv(csv_file):\n", 180 | " import csv\n", 181 | " with open(csv_file, 'r', encoding='utf-8-sig') as f:\n", 182 | " data = csv.DictReader(f)\n", 183 | " for line in data:\n", 184 | " yield line['id'], line['path'], line['label']\n", 185 | "\n", 186 | "def ground_truth(path):\n", 187 | " label = path.split('/')[-2]\n", 188 | " return label_ids[label]\n", 189 | "\n", 190 | "def mean_hit_ratio(actual, predicted):\n", 191 | " ratios = []\n", 192 | " for act, pre in zip(actual, predicted):\n", 193 | " hit_num = len(set(act) & set(pre))\n", 194 | " ratios.append(hit_num / len(act))\n", 195 | " return sum(ratios) / len(ratios)\n", 196 | "\n", 197 | "def mean_average_precision(actual, predicted):\n", 198 | " aps = []\n", 199 | " for act, pre in zip(actual, predicted):\n", 200 | " precisions = []\n", 201 | " hit = 0\n", 202 | " for idx, i in enumerate(pre):\n", 203 | " if i in act:\n", 204 | " hit += 1\n", 205 | " precisions.append(hit / (idx + 1))\n", 206 | " aps.append(sum(precisions) / len(precisions))\n", 207 | " \n", 208 | " return sum(aps) / len(aps)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 6, 214 | "id": "d015dfaf", 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "data": { 219 | "text/html": [ 220 | "
mHR mAP
0.715 0.7723293650793651
" 221 | ], 222 | "text/plain": [ 223 | "" 224 | ] 225 | }, 226 | "metadata": {}, 227 | "output_type": "display_data" 228 | } 229 | ], 230 | "source": [ 231 | "import glob\n", 232 | "from towhee import pipe, ops\n", 233 | "from towhee.datacollection import DataCollection\n", 234 | "\n", 235 | "collection = create_milvus_collection('timesformer', 768)\n", 236 | "\n", 237 | "insert_pipe = (\n", 238 | " pipe.input('csv_path')\n", 239 | " .flat_map('csv_path', ('id', 'path', 'label'), read_csv)\n", 240 | " .map('id', 'id', lambda x: int(x))\n", 241 | " .map('path', 'frames', ops.video_decode.ffmpeg(sample_type='uniform_temporal_subsample', args={'num_samples': 8}))\n", 242 | " .map('frames', ('labels', 'scores', 'features'), ops.action_classification.timesformer(skip_preprocess=True))\n", 243 | " .map('features', 'features', ops.towhee.np_normalize())\n", 244 | " .map(('id', 'features'), 'insert_res', ops.ann_insert.milvus_client(host='127.0.0.1', port='19530', collection_name='timesformer'))\n", 245 | " .output()\n", 246 | ")\n", 247 | "\n", 248 | "insert_pipe('reverse_video_search.csv')\n", 249 | "\n", 250 | "collection.load()\n", 251 | "eval_pipe = (\n", 252 | " pipe.input('path')\n", 253 | " .flat_map('path', 'path', lambda x: glob.glob(x))\n", 254 | " .map('path', 'frames', ops.video_decode.ffmpeg(sample_type='uniform_temporal_subsample', args={'num_samples': 8}))\n", 255 | " .map('frames', ('labels', 'scores', 'features'), ops.action_classification.timesformer(skip_preprocess=True))\n", 256 | " .map('features', 'features', ops.towhee.np_normalize())\n", 257 | " .map('features', 'result', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='timesformer', limit=10)) \n", 258 | " .map('result', 'predict', lambda x: [i[0] for i in x])\n", 259 | " .map('path', 'ground_truth', ground_truth)\n", 260 | " .window_all(('ground_truth', 'predict'), 'mHR', mean_hit_ratio)\n", 261 | " .window_all(('ground_truth', 'predict'), 'mAP', mean_average_precision)\n", 262 | " .output('mHR', 'mAP')\n", 263 | ")\n", 264 | "\n", 265 | "res = DataCollection(eval_pipe('./test/*/*.mp4'))\n", 266 | "res.show()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "id": "e9d78601", 272 | "metadata": {}, 273 | "source": [ 274 | "## Dimensionality Reduction\n", 275 | "\n", 276 | "In production, memory consumption is always a major concern, which can by relieved by minimizing the embedding dimension. Random projection is a dimensionality reduction method for a set vectors in Euclidean space. Since this method is fast and requires no training, we'll try this technique and compare performance with TimeSformer model:" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "id": "2474daba", 282 | "metadata": {}, 283 | "source": [ 284 | "First let's get a quick look at the engine performance without dimension reduction. The embedding dimension is 768." 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "id": "dca23c3e", 290 | "metadata": {}, 291 | "source": [ 292 | "To reduce dimension, we can apply a projection matrix in proper size to each original embedding. We can just add an operator `.map('features', 'features', lambda x: np.dot(x, projection_matrix))` right after an video embedding is generated. Let's see how's the engine performance with embedding dimension down to 128." 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 9, 298 | "id": "7343f885", 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/html": [ 304 | "
mHR mAP
0.61 0.6778511904761905
" 305 | ], 306 | "text/plain": [ 307 | "" 308 | ] 309 | }, 310 | "metadata": {}, 311 | "output_type": "display_data" 312 | } 313 | ], 314 | "source": [ 315 | "import numpy as np\n", 316 | "\n", 317 | "projection_matrix = np.random.normal(scale=1.0, size=(768, 128))\n", 318 | "\n", 319 | "collection = create_milvus_collection('timesformer_128', 128)\n", 320 | "\n", 321 | "insert_pipe = (\n", 322 | " pipe.input('csv_path')\n", 323 | " .flat_map('csv_path', ('id', 'path', 'label'), read_csv)\n", 324 | " .map('id', 'id', lambda x: int(x))\n", 325 | " .map('path', 'frames', ops.video_decode.ffmpeg(sample_type='uniform_temporal_subsample', args={'num_samples': 8}))\n", 326 | " .map('frames', ('labels', 'scores', 'features'), ops.action_classification.timesformer(skip_preprocess=True))\n", 327 | " .map('features', 'features', lambda x: np.dot(x, projection_matrix))\n", 328 | " .map('features', 'features', ops.towhee.np_normalize())\n", 329 | " .map(('id', 'features'), 'insert_res', ops.ann_insert.milvus_client(host='127.0.0.1', port='19530', collection_name='timesformer_128'))\n", 330 | " .output()\n", 331 | ")\n", 332 | "\n", 333 | "insert_pipe('reverse_video_search.csv')\n", 334 | "\n", 335 | "collection.load()\n", 336 | "eval_pipe = (\n", 337 | " pipe.input('path')\n", 338 | " .flat_map('path', 'path', lambda x: glob.glob(x))\n", 339 | " .map('path', 'frames', ops.video_decode.ffmpeg(sample_type='uniform_temporal_subsample', args={'num_samples': 8}))\n", 340 | " .map('frames', ('labels', 'scores', 'features'), ops.action_classification.timesformer(skip_preprocess=True))\n", 341 | " .map('features', 'features', lambda x: np.dot(x, projection_matrix))\n", 342 | " .map('features', 'features', ops.towhee.np_normalize())\n", 343 | " .map('features', 'result', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='timesformer_128', limit=10)) \n", 344 | " .map('result', 'predict', lambda x: [i[0] for i in x])\n", 345 | " .map('path', 'ground_truth', ground_truth)\n", 346 | " .window_all(('ground_truth', 'predict'), 'mHR', mean_hit_ratio)\n", 347 | " .window_all(('ground_truth', 'predict'), 'mAP', mean_average_precision)\n", 348 | " .output('mHR', 'mAP')\n", 349 | ")\n", 350 | "\n", 351 | "res = DataCollection(eval_pipe('./test/*/*.mp4'))\n", 352 | "res.show()" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "id": "9c9c999b", 358 | "metadata": {}, 359 | "source": [ 360 | "It's surprising that the performance is not affected a lot. Both mHR and mAP descrease by about 0.1 while the embedding size are reduced by 6 times (dimension from 768 to 128)." 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "id": "0a71defe", 366 | "metadata": {}, 367 | "source": [] 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "Python 3 (ipykernel)", 373 | "language": "python", 374 | "name": "python3" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 3 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython3", 386 | "version": "3.8.12" 387 | }, 388 | "vscode": { 389 | "interpreter": { 390 | "hash": "f7dd10cdbe9a9c71f7e71741efd428241b5f9fa0fecdd29ae07a5706cd5ff8a2" 391 | } 392 | } 393 | }, 394 | "nbformat": 4, 395 | "nbformat_minor": 5 396 | } 397 | -------------------------------------------------------------------------------- /video/reverse_video_search/README.md: -------------------------------------------------------------------------------- 1 | # Reverse Video Search 2 | 3 | Reverse video search is similar like [reverse image search](https://github.com/towhee-io/examples/tree/main/image/reverse_image_search). In simple words, it takes a video as input to search for similar videos. As we know that video-related tasks are harder to tackle, video models normally do not achieve as high scores as other types of models. However, there are increasing demands in AI applications in video. Reverse video search can effectively discover related videos and improve other applications. 4 | 5 | This reverse video search example mainly consists of three notebooks, and I think everyone can learn the basic operations of Towhee and Milvus through the [**getting started notebook**](./1_reverse_video_search_engine.ipynb). And the [**deep dive notebook**](./2_deep_dive_reverse_video_search.ipynb) will make the engine more feasible in production. 6 | 7 | ## Learn from Notebook 8 | 9 | - [Getting started](./1_reverse_video_search_engine.ipynb) 10 | 11 | In this notebook you will get prerequisites, build and use a basic reverse video search system, visualize sample results, make simple optimizations, and measure the system with performance metrics. 12 | 13 | - [Deep Dive](./2_deep_dive_reverse_video_search.ipynb) 14 | 15 | In this notebook, you will learn how to reduce resource usage, speed up system, and ensure stability. 16 | -------------------------------------------------------------------------------- /video/reverse_video_search/reverse_video_search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/reverse_video_search/reverse_video_search.png -------------------------------------------------------------------------------- /video/reverse_video_search/tmp/Ou1w86qEr58.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/reverse_video_search/tmp/Ou1w86qEr58.gif -------------------------------------------------------------------------------- /video/reverse_video_search/tmp/V7DUq0JJneY.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/reverse_video_search/tmp/V7DUq0JJneY.gif -------------------------------------------------------------------------------- /video/reverse_video_search/tmp/bTCznQiu0hc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/reverse_video_search/tmp/bTCznQiu0hc.gif -------------------------------------------------------------------------------- /video/reverse_video_search/tmp/ty4UQlowp0c.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/reverse_video_search/tmp/ty4UQlowp0c.gif -------------------------------------------------------------------------------- /video/text_video_retrieval/README.md: -------------------------------------------------------------------------------- 1 | # Text-Video Retrieval 2 | 3 | The objective of video retrieval is as follows: given a text query and a pool of candidate videos, select the video which corresponds to the text query. Typically, the videos are returned as a ranked list of candidates and scored via document retrieval metrics. 4 | 5 | This text-video retrieval example mainly consists of two notebooks, and I think everyone can learn the basic operations of Towhee and Milvus through the [**getting started notebook**](./1_text_video_retrieval_engine.ipynb). And the [**deep dive notebook**](./2_deep_dive_text_video_retrieval.ipynb) will make the engine more feasible in production. 6 | 7 | ## Learn from Notebook 8 | 9 | - [Getting started](./1_text_video_retrieval_engine.ipynb) 10 | 11 | In this notebook, you will learn how to reduce resource usage, speed up system, and ensure stability. 12 | -------------------------------------------------------------------------------- /video/text_video_retrieval/tmp_gifs/video7365.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/text_video_retrieval/tmp_gifs/video7365.gif -------------------------------------------------------------------------------- /video/text_video_retrieval/tmp_gifs/video7579.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/text_video_retrieval/tmp_gifs/video7579.gif -------------------------------------------------------------------------------- /video/text_video_retrieval/tmp_gifs/video7725.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/text_video_retrieval/tmp_gifs/video7725.gif -------------------------------------------------------------------------------- /video/text_video_retrieval/tmp_gifs/video8068.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/text_video_retrieval/tmp_gifs/video8068.gif -------------------------------------------------------------------------------- /video/text_video_retrieval/tmp_gifs/video9258.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/text_video_retrieval/tmp_gifs/video9258.gif -------------------------------------------------------------------------------- /video/video_copy_detection/README.md: -------------------------------------------------------------------------------- 1 | # Video Deduplication 2 | 3 | Video Deduplication, also known as Video Copy Detection or Video Identification by Fingerprinting, means that given a query video, you need to find or retrieval the videos with the same content with query video. 4 | 5 | Due to the popularity of Internet-based video sharing services, the volume of video content on the Web has reached unprecedented scales. Besides copyright protection, a video copy detection system is important in applications like video classification, tracking, filtering and recommendation. 6 | 7 | Generally speaking, video deduplication tasks can be divided into two categories according to the retrieval level: one is video-level deduplication, and the other is segment-level deduplication . 8 | 9 | - Video-level deduplication is a method for situations with high repetition. It finds duplicate videos by comparing the similarity between the embeddings of the whole video. Since only one embedding is extracted from a video, this method works faster. But the limitation of this method is also obvious: it is not good for detecting similar videos of different lengths. For example, the first quarter of video A and video B are exactly the same, but their embeddings may not be similar. In this case, it is obviously impossible to detect infringing content. 10 | 11 | - Segment-level deduplication detects the specific start and end times of repeated segments, which can handle complex clipping and insertion of video segments as well as situations where the video lengths are not equal. It does so by comparing the similarity between video frames. Obviously, we need to use this method in the actual task of mass video duplication checking. Of course, the speed of this method will be slower than the one of video level. 12 | 13 | ## Learn from Notebook 14 | 15 | - [Getting started with a video-level example](video_level/video_deduplication_at_video_level.ipynb) 16 | 17 | In this notebook you will get prerequisites, build and use a basic Video Deduplication system based on video level, visualize sample results, and measure the system with performance metrics. 18 | 19 | - [Build a practical segment-level example](segment_level/video_deduplication_at_segment_level.ipynb) 20 | 21 | In this notebook you will get prerequisites, build a more practical Video Deduplication system with greater robustness, more engineered solution, and finer-grained results. 22 | -------------------------------------------------------------------------------- /video/video_copy_detection/segment_level/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/segment_level/example.png -------------------------------------------------------------------------------- /video/video_copy_detection/segment_level/tmp_gifs/d62ce5becff14a0c9c7dab5eea6647dc_the_wandering_earth_Wu_Jing_became_teenage_idol-1qf4y1G7gM.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/segment_level/tmp_gifs/d62ce5becff14a0c9c7dab5eea6647dc_the_wandering_earth_Wu_Jing_became_teenage_idol-1qf4y1G7gM.gif -------------------------------------------------------------------------------- /video/video_copy_detection/segment_level/tmp_gifs/e5dc80abd7a24b47accde190c9fdbcdc-The_Wandering_Earth_is_on_CCTV-1db411m7f8.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/segment_level/tmp_gifs/e5dc80abd7a24b47accde190c9fdbcdc-The_Wandering_Earth_is_on_CCTV-1db411m7f8.gif -------------------------------------------------------------------------------- /video/video_copy_detection/segment_level/tmp_gifs/ef65e0f662e646a88a13b6eddb640e48-News_Broadcast_on_Wandering_Earth-1xb411U7uE.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/segment_level/tmp_gifs/ef65e0f662e646a88a13b6eddb640e48-News_Broadcast_on_Wandering_Earth-1xb411U7uE.gif -------------------------------------------------------------------------------- /video/video_copy_detection/segment_level/tmp_gifs2/0640bd5d43d1499c962e275be6b804ef-Does_MaDongmei_live_here-1e64y1y799.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/segment_level/tmp_gifs2/0640bd5d43d1499c962e275be6b804ef-Does_MaDongmei_live_here-1e64y1y799.gif -------------------------------------------------------------------------------- /video/video_copy_detection/segment_level/tmp_gifs2/ad244c924f31461a9d809c77ae251ac1-the_classic_dialogue_what_is_Ma_Mei-1y7411n7y1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/segment_level/tmp_gifs2/ad244c924f31461a9d809c77ae251ac1-the_classic_dialogue_what_is_Ma_Mei-1y7411n7y1.gif -------------------------------------------------------------------------------- /video/video_copy_detection/segment_level/video_decopy_insert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/segment_level/video_decopy_insert.png -------------------------------------------------------------------------------- /video/video_copy_detection/segment_level/video_decopy_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/segment_level/video_decopy_query.png -------------------------------------------------------------------------------- /video/video_copy_detection/video_level/tmp_gifs/2bdf8029b38735a992a56e32cfc81466eea81286.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/video_level/tmp_gifs/2bdf8029b38735a992a56e32cfc81466eea81286.gif -------------------------------------------------------------------------------- /video/video_copy_detection/video_level/tmp_gifs/b61905d41276ccf2af59d4985158f8b1ce1d4990.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/video_level/tmp_gifs/b61905d41276ccf2af59d4985158f8b1ce1d4990.gif -------------------------------------------------------------------------------- /video/video_copy_detection/video_level/tmp_gifs/e2adc784b83446ae775f698b9d17c9fd392b2f75.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_copy_detection/video_level/tmp_gifs/e2adc784b83446ae775f698b9d17c9fd392b2f75.gif -------------------------------------------------------------------------------- /video/video_tagging/README.md: -------------------------------------------------------------------------------- 1 | # Video Tagging 2 | 3 | Video tagging means adding proper tags for videos. Tags can be various from different aspects. For example, object detection, action recognition, place identification can all contribute to video tagging. 4 | 5 | This video tagging bootcamp mainly includes one notebook for each type of tagging. You can learn about video tagging solutions and basic concepts of [Towhee](https://towhee.io/) from these notebook tutorials. 6 | 7 | ## Learn from Notebook 8 | 9 | - [Action Classification](./action_classification.ipynb) 10 | 11 | In this notebook you will build a basic video action classification system with sample data (of 20 human activities), visualize predicted labels, and measure the system with performance metrics. Moreover, you can try optimization methods for accuracy and efficiency. At the end, you are able to build up a playable video classification system with 5 lines of code. 12 | -------------------------------------------------------------------------------- /video/video_tagging/action_classification_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/towhee-io/examples/5fdf32f338131fdcead6f0c67e9823321c4a8d04/video/video_tagging/action_classification_demo.png --------------------------------------------------------------------------------