├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── README_CN.md ├── SETTINGS.yaml ├── app.py ├── examples ├── Latest Updates on AI Models 2024-05-02.pdf └── Latest Updates on AI Models2024-05-02.md ├── logs └── Agently_daily_news_collector.log ├── prompts ├── create_outline.yaml ├── pick_news.yaml ├── summarize.yaml └── write_column.yaml ├── requirements.txt ├── utils ├── __init__.py ├── logger.py ├── path.py └── yaml_reader.py └── workflows ├── __init__.py ├── column_workflow.py ├── main_workflow.py └── tools ├── __init__.py ├── browse.py └── search.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | # MacOS 163 | .DS_Store 164 | 165 | # Agently 166 | .Agently 167 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | 3 | WORKDIR /app 4 | 5 | COPY . . 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | CMD ["python", "app.py"] 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |

Agently-Daily-News-Collector

4 | 5 | English | 中文说明 6 | 7 |
8 | 9 | **Agently Daily News Collector** is an open-source LLM based automatically news collecting workflow showcase project powered by [**_Agently_** AI application development framework](https://github.com/Maplemx/Agently). 10 | 11 | You can use this project to generate almost any topic of news collection. All you need to do is simply input the field topic of your news collection. Then you wait and the AI agents will do their jobs automatically until a high quality news collection is generated and saved into a markdown file. 12 | 13 | News collection file examples: 14 | 15 | `MarkDown File` [Lastest Updated on AI Models 2024-05-02](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/examples/Latest%20Updates%20on%20AI%20Models2024-05-02.md) 16 | 17 | `PDF File` [Lastest Updated on AI Models 2024-05-02](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/examples/Latest%20Updates%20on%20AI%20Models%202024-05-02.pdf) 18 | 19 | > **ℹ️ Notice:** 20 | > 21 | > Visit https://github.com/Maplemx/Agently if you want to learn more about **_Agently_** AI Application development framework. 22 | 23 | ## How to Use 24 | 25 | ### Step 1: Clone this repo 26 | 27 | Run this command in shell: 28 | 29 | ```shell 30 | git clone git@github.com:AgentEra/Agently-Daily-News-Collector.git 31 | ``` 32 | 33 | ### Step 2: Edit settings YAML file 34 | 35 | You can find [`SETTINGS.yaml`](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/SETTINGS.yaml) file in the project dir. 36 | 37 | Input your model's API key and change other settings as your wish. 38 | 39 | If you want to use other model, you can read [this document](https://github.com/Maplemx/Agently/blob/main/docs/guidebook/application_development_handbook.ipynb) or [this Agently official website page](http://agently.tech/features/model_request.html) to see how to set the settings. 40 | 41 | ### Step 3: Start 42 | 43 | Because this project is a Python project, you need to install Python first. You can find installation instruction on [Python official website](https://www.python.org/). 44 | 45 | At the first time to run this project, you should use this command in shell to download and install dependency packages: 46 | 47 | ```shell 48 | pip install -r path/to/project/requirements.txt 49 | ``` 50 | 51 | Wait until the dependency packages are installed then use this command in shell to start the generation process. 52 | 53 | ```shell 54 | python path/to/project/app.py 55 | ``` 56 | 57 | You will see a tip `[Please input the topic of your daily news collection]:`. 58 | 59 | Input your topic idea about the field of news that you want to collect, then you're good to go. 60 | 61 | During the process, there'll be some logs printed to shell to present what tasks are done like this: 62 | 63 | ```shell 64 | 2024-05-02 22:44:27,347 [INFO] [Outline Generated] {'report_title': "Today's news about AI Models Appliaction", 'column_list': [{'column_title': 'Latest News', 'column_requirement': 'The content is related to AI Models Appliaction, and the time is within 24 hours', 'search_keywords': 'AI Models Appliaction news latest'}, {'column_title': 'Hot News', 'column_requirement': 'The content is related to AI Models Appliaction, and the interaction is high', 'search_keywords': 'AI Models Appliaction news hot'}, {'column_title': 'Related News', 'column_requirement': 'The content is related to AI Models Appliaction, but not news', 'search_keywords': 'AI Models Appliaction report'}]} 65 | 2024-05-02 22:44:32,352 [INFO] [Start Generate Column] Latest News 66 | 2024-05-02 22:44:34,132 [INFO] [Search News Count] 8 67 | 2024-05-02 22:44:46,062 [INFO] [Picked News Count] 2 68 | 2024-05-02 22:44:46,062 [INFO] [Summarzing] With Support from AWS, Yseop Develops a Unique Generative AI Application for Regulatory Document Generation Across BioPharma 69 | 2024-05-02 22:44:52,579 [INFO] [Summarzing] Success 70 | 2024-05-02 22:44:57,580 [INFO] [Summarzing] Over 500 AI models are now optimised for Core Ultra processors, says Intel 71 | 2024-05-02 22:45:02,130 [INFO] [Summarzing] Success 72 | 2024-05-02 22:45:19,475 [INFO] [Column Data Prepared] {'title': 'Latest News', 'prologue': 'Stay up-to-date with the latest advancements in AI technology with these news updates: [Yseop Partners with AWS to Develop Generative AI for BioPharma](https://finance.yahoo.com/news/support-aws-yseop-develops-unique-130000171.html) and [Intel Optimizes Over 500 AI Models for Core Ultra Processors](https://www.business-standard.com/technology/tech-news/over-500-ai-models-are-now-optimised-for-core-ultra-processors-says-intel-124050200482_1.html).', 'news_list': [{'url': 'https://finance.yahoo.com/news/support-aws-yseop-develops-unique-130000171.html', 'title': 'With Support from AWS, Yseop Develops a Unique Generative AI Application for Regulatory Document Generation Across BioPharma', 'summary': "Yseop utilizes AWS to create a new Generative AI application for the Biopharma sector. This application leverages AWS for its scalability and security, and it allows Biopharma companies to bring pharmaceuticals and vaccines to the market more quickly. Yseop's platform integrates LLM models for generating scientific content while meeting the security standards of the pharmaceutical industry.", 'recommend_comment': 'AWS partnership helps Yseop develop an innovative Generative AI application for the BioPharma industry, enabling companies to expedite the delivery of pharmaceuticals and vaccines to market. The integration of LLM models and compliance with stringent pharmaceutical industry security standards make this a valuable solution for BioPharma companies.'}, {'url': 'https://www.business-standard.com/technology/tech-news/over-500-ai-models-are-now-optimised-for-core-ultra-processors-says-intel-124050200482_1.html', 'title': 'Over 500 AI models are now optimised for Core Ultra processors, says Intel', 'summary': 'Intel stated over 500 AI models are optimized for Core Ultra processors. These models are accessible from well-known sources like OpenVINO Model Zoo, Hugging Face, ONNX Model Zoo, and PyTorch.', 'recommend_comment': "Intel's optimization of over 500 AI models for Core Ultra processors provides access to a vast selection of pre-trained models from reputable sources. This optimization enhances the performance and efficiency of AI applications, making it easier for developers to deploy AI solutions on Intel-based hardware."}]} 73 | ``` 74 | 75 | Whole process will take some time, so just relax and have some rest☕️. 76 | 77 | ### Step 4: Get your news collection markdown file! 78 | 79 | When the process is done finally, you will see a tip like this with markdown text that generated printed on screen: 80 | 81 | ```shell 82 | 2024-05-02 21:57:20,521 [INFO] [Markdown Generated] 83 | ``` 84 | 85 | Then you can find a markdown file named ` .md` in your project dir. 86 | 87 | Enjoy it! 😄 88 | 89 | --- 90 | 91 | ## Mainly Dependencies 92 | 93 | - **Agently AI Development Framework**: https://github.com/Maplemx/Agently | https://pypi.org/project/Agently/ 94 | - **duckduckgo-search**: https://pypi.org/project/duckduckgo-search/ 95 | - **BeautifulSoup4**: https://pypi.org/project/beautifulsoup4/ 96 | - **PyYAM**L: https://pypi.org/project/pyyaml/ 97 | 98 | --- 99 | 100 | Please ⭐️ this repo and [Agently](https://github.com/Maplemx/Agently) main repo if you like it! Thank you very much! 101 | 102 | > 💡 Ideas / Bug Report: [Report Issues Here](https://github.com/AgentEra/Agently-Daily-News-Collector/issues) 103 | > 104 | > 📧 Email Us: [developer@agently.cn](mailto:developer@agently.cn) 105 | > 106 | > 👾 Discord Group: 107 | > 108 | > [Click Here to Join](https://discord.gg/4HnarMBpYT) or Scan the QR Code Down Below 109 | > 110 | > image 111 | > 112 | > 💬 WeChat Group(加入微信群): 113 | > 114 | > [Click Here to Apply](https://doc.weixin.qq.com/forms/AIoA8gcHAFMAScAhgZQABIlW6tV3l7QQf) or Scan the QR Code Down Below 115 | > 116 | > image -------------------------------------------------------------------------------- /README_CN.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |

Agently-Daily-News-Collector

4 | 5 |

Agently 新闻汇总报告生成器

6 | 7 | English Introduction | 中文说明 8 | 9 |
10 | 11 | **Agently新闻汇总报告生成器**是一个基于[**_Agently_** AI应用开发框架](https://github.com/Maplemx/Agently)开发的应用项目。本项目构建了**基于大语言模型驱动的全自动工作流**,能够根据用户输入的主题关键词,自动完成新闻汇总报告的结构设计、栏目组稿(含新闻检索、筛查、总结、栏目信息撰写)及报告MarkDown格式文件的输出全过程。同时,本项目**完全开源**,欢迎开发者们通过Fork->PR的方式共同优化。 12 | 13 | 新闻汇总报告的样例可参考: 14 | 15 | `MarkDown文件` [Lastest Updated on AI Models 2024-05-02](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/examples/Latest%20Updates%20on%20AI%20Models2024-05-02.md) 16 | 17 | `PDF文件` [Lastest Updated on AI Models 2024-05-02](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/examples/Latest%20Updates%20on%20AI%20Models%202024-05-02.pdf) 18 | 19 | > 如果您希望进一步了解[**_Agently_** AI应用开发框架](https://github.com/Maplemx/Agently),您可以访问框架的[主仓库地址](https://github.com/Maplemx/Agently)或是[中文官网](http://Agently.cn)阅读更多相关信息,框架提供了丰富的教程和案例,帮助您逐步上手。 20 | 21 | ## 如何使用 22 | 23 | ### 第一步:将本仓库Clone到本地 24 | 25 | 在您的开发目录中使用以下Shell脚本指令: 26 | 27 | ```shell 28 | git clone git@github.com:AgentEra/Agently-Daily-News-Collector.git 29 | ``` 30 | 31 | ### 第二步:修改SETTINGS.yaml设置文件 32 | 33 | 您可以在Clone到本地的项目文件夹中找到[`SETTINGS.yaml`](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/SETTINGS.yaml)这个文件,再根据您的需要修改其中的设置项即可。 34 | 35 | 下面是具体的设置项说明: 36 | 37 | ```yaml 38 | # Debug Settings 39 | IS_DEBUG: false # 如果此项为true,将会输出更多执行过程信息,包括搜索和模型请求的明细信息 40 | # Proxy Settings 41 | PROXY: http://127.0.0.1:7890 # 项目中的搜索和模型请求可能会需要使用前向代理,可以通过此项设置代理信息 42 | # Model Settings 43 | MODEL_PROVIDER: OAIClient #默认使用OpenAI格式的兼容客户端,此客户端能够适配OpenAI以及各类兼容OpenAI格式的本地模型 44 | MODEL_URL: http://base_url_path # 如果您需要修改Base URL,使用此项进行设置 45 | MODEL_AUTH: 46 | api_key: "" # 在这里输入鉴权用的API-Key信息 47 | MODEL_OPTIONS: # 在这里指定模型需要的其他参数,如指定具体的模型,或是调整temperature 48 | model: gpt-3.5-turbo 49 | temperature: 0.8 50 | # Application Settings 51 | MAX_COLUMN_NUM: 3 # 在这里设置汇总报告结构中的专栏数量 52 | OUTPUT_LANGUAGE: Chinese # 在这里设置汇总报告的输出语种,默认为英语,您可能需要手动改成中文 53 | MAX_SEARCH_RESULTS: 8 # 在这里设置每个栏目搜索的最大结果数量 54 | # 注意,如果数量设置过大,可能会导致超出模型的处理窗口大小,请根据模型具体情况设置 55 | SLEEP_TIME: 5 # 在这里设置每次模型请求后的等待时间,以防止频繁请求导致模型拒绝访问 56 | ``` 57 | 58 | 如果您想要了解切换其他模型的更多细节,可以阅读Agently官方网站关于[模型设置的说明页面](http://agently.tech/features/model_request.html)。 59 | 60 | ### 第三步:启动任务 61 | 62 | 因为本项目为Python项目,您需要在本地安装Python环境。您可以在[Python官方网站](https://www.python.org/)找到适合您的安装方法。 63 | 64 | 然后,在您的项目目录下使用以下Shell脚本指令更新项目依赖包: 65 | 66 | ```shell 67 | pip install -r requirements.txt 68 | ``` 69 | 70 | 依赖包安装完毕后,通过以下Shell脚本指令即可启动: 71 | 72 | ```shell 73 | python app.py 74 | ``` 75 | 76 | 随后您会看到一个提示:`[Please input the topic of your daily news collection]:`。 77 | 78 | 根据提示输入您想要汇总的新闻领域主题关键词,或是用一句话描述您想要生成什么样的新闻汇总报告,然后任务就会开始自动运行了。在这里,您可以输入任何语种的内容,但生成内容的语种会和您在第二步中的设置的语种要求相同。 79 | 80 | 接下来您就可以等待运行的结果了,整个过程大约需要5-8分钟。 81 | 82 | 在运行的过程中,您会看到类似下面展示的输出日志,这些日志将帮助您了解当前在处理的任务,以及运行的关键进展情况: 83 | 84 | ```shell 85 | 2024-05-02 22:44:27,347 [INFO] [Outline Generated] {'report_title': "Today's news about AI Models Appliaction", 'column_list': [{'column_title': 'Latest News', 'column_requirement': 'The content is related to AI Models Appliaction, and the time is within 24 hours', 'search_keywords': 'AI Models Appliaction news latest'}, {'column_title': 'Hot News', 'column_requirement': 'The content is related to AI Models Appliaction, and the interaction is high', 'search_keywords': 'AI Models Appliaction news hot'}, {'column_title': 'Related News', 'column_requirement': 'The content is related to AI Models Appliaction, but not news', 'search_keywords': 'AI Models Appliaction report'}]} 86 | 2024-05-02 22:44:32,352 [INFO] [Start Generate Column] Latest News 87 | 2024-05-02 22:44:34,132 [INFO] [Search News Count] 8 88 | 2024-05-02 22:44:46,062 [INFO] [Picked News Count] 2 89 | 2024-05-02 22:44:46,062 [INFO] [Summarzing] With Support from AWS, Yseop Develops a Unique Generative AI Application for Regulatory Document Generation Across BioPharma 90 | 2024-05-02 22:44:52,579 [INFO] [Summarzing] Success 91 | 2024-05-02 22:44:57,580 [INFO] [Summarzing] Over 500 AI models are now optimised for Core Ultra processors, says Intel 92 | 2024-05-02 22:45:02,130 [INFO] [Summarzing] Success 93 | 2024-05-02 22:45:19,475 [INFO] [Column Data Prepared] {'title': 'Latest News', 'prologue': 'Stay up-to-date with the latest advancements in AI technology with these news updates: [Yseop Partners with AWS to Develop Generative AI for BioPharma](https://finance.yahoo.com/news/support-aws-yseop-develops-unique-130000171.html) and [Intel Optimizes Over 500 AI Models for Core Ultra Processors](https://www.business-standard.com/technology/tech-news/over-500-ai-models-are-now-optimised-for-core-ultra-processors-says-intel-124050200482_1.html).', 'news_list': [{'url': 'https://finance.yahoo.com/news/support-aws-yseop-develops-unique-130000171.html', 'title': 'With Support from AWS, Yseop Develops a Unique Generative AI Application for Regulatory Document Generation Across BioPharma', 'summary': "Yseop utilizes AWS to create a new Generative AI application for the Biopharma sector. This application leverages AWS for its scalability and security, and it allows Biopharma companies to bring pharmaceuticals and vaccines to the market more quickly. Yseop's platform integrates LLM models for generating scientific content while meeting the security standards of the pharmaceutical industry.", 'recommend_comment': 'AWS partnership helps Yseop develop an innovative Generative AI application for the BioPharma industry, enabling companies to expedite the delivery of pharmaceuticals and vaccines to market. The integration of LLM models and compliance with stringent pharmaceutical industry security standards make this a valuable solution for BioPharma companies.'}, {'url': 'https://www.business-standard.com/technology/tech-news/over-500-ai-models-are-now-optimised-for-core-ultra-processors-says-intel-124050200482_1.html', 'title': 'Over 500 AI models are now optimised for Core Ultra processors, says Intel', 'summary': 'Intel stated over 500 AI models are optimized for Core Ultra processors. These models are accessible from well-known sources like OpenVINO Model Zoo, Hugging Face, ONNX Model Zoo, and PyTorch.', 'recommend_comment': "Intel's optimization of over 500 AI models for Core Ultra processors provides access to a vast selection of pre-trained models from reputable sources. This optimization enhances the performance and efficiency of AI applications, making it easier for developers to deploy AI solutions on Intel-based hardware."}]} 94 | ``` 95 | ### 第四步:得到一份新鲜出炉的新闻汇总报告📰! 96 | 97 | 在整个处理过程结束时,您将会看到类似下方的提示,并可以看到完整的报告MarkDown格式结果被输出到屏幕上: 98 | 99 | ```shell 100 | 2024-05-02 21:57:20,521 [INFO] [Markdown Generated] 101 | ``` 102 | 103 | 同时,您也可以在您的项目文件夹中找到一份命名格式为`<汇总报告名称> <生成日期>.md`的文件。 104 | 105 | 大功告成!🎉 106 | 107 | --- 108 | 109 | ## 常见问题(FAQ) 110 | 111 | **1. 为什么提示请求超时(如:operation timed out,504错误等)或是出现搜索结果为0的情况?** 112 | 113 | 通常情况下是因为无法请求外网造成的,需要通过配置Proxy代理解决,配置方法请参考上文中SETTINGS.yaml的PROXY配置项 114 | 115 | **2. 我用的国内模型,为什么还需要配置Proxy?** 116 | 117 | 本项目默认使用的搜索工具是外网的搜索工具,所以即使模型本身不存在请求访问限制的问题,也需要使用Proxy代理。 118 | 119 | **3. 我机子上开着VPN或者其他代理软件呢,为什么还需要配置Proxy?** 120 | 121 | 因为大部分情况下,在代理软件没有开启特定的全局系统请求代理模式(如Clash的TUN模式)前,Python脚本在运行时,不会使用代理软件在本地启动的代理服务接口来发起网络请求,而是会直接对目标网址进行请求。因此,需要通过配置Proxy来帮助Python脚本调用代理软件在本地启动的代理服务接口。 122 | 123 | 以Clash客户端为例,你可以从这个位置找到代理服务的端口: 124 | 125 | image 126 | 127 | 根据上图,本地的Proxy服务地址应该为http://127.0.0.1:7890 128 | 129 | **4. 我自己有更好用的搜索/浏览工具,我能不能干脆直接修改掉默认的搜索或者浏览工具?** 130 | 131 | 当然可以,我们在项目中对不同的模块都做了解耦,要修改搜索工具,只需要修改/workflows/tools/search.py文件即可,而要修改浏览工具,只需要修改/workflows/tools/browse.py文件即可。 132 | 133 | 我们也非常欢迎您将自己修改优化的好用的工具PR给项目,分享给更多的人使用,这类贡献者我们还会在项目首页进行署名感谢! 134 | 135 | **5. 都说到这里了,我还能修改/定制/优化哪些地方?** 136 | 137 | 您可以修改的内容包括: 138 | 139 | 1. /workflows 文件夹中的处理工作流程,比如,添加一个愚人节处理工作流,来生成洋葱新闻 140 | 2. /workflows/tools 文件夹中的搜索和浏览工具,或是结合新增处理工作流,添加更多你觉得有用的工具 141 | 3. /prompts 文件夹中的所有Prompt YAML文件,这些文件都非常易读易修改,欢迎您修改出更好的Prompt,或是简单地将英文Prompt调整为中文 142 | 143 | --- 144 | 145 | ## 主要依赖说明 146 | 147 | - Agently AI应用开发框架:https://github.com/Maplemx/Agently | https://pypi.org/project/Agently/ | http://Agently.cn 148 | - duckduckgo-search: https://pypi.org/project/duckduckgo-search/ 149 | - BeautifulSoup4: https://pypi.org/project/beautifulsoup4/ 150 | - PyYAML: https://pypi.org/project/pyyaml/ 151 | 152 | --- 153 | 154 | 如果您喜欢这个项目,请为本项目以及[Agently框架主仓库](https://github.com/Maplemx/Agently)点亮⭐️。 155 | 156 | 如果您希望了解更多关于本项目的线上产品化版本信息,欢迎通过下面的方式加入我们的讨论群,我们将在近期组织线上产品化版本的测试。 157 | 158 | > 💡 意见反馈/Bug提交: [Report Issues Here](https://github.com/AgentEra/Agently-Daily-News-Collector/issues) 159 | > 160 | > 📧 联系我们: [developer@agently.cn](mailto:developer@agently.cn) 161 | > 162 | > 💬 加入微信讨论群: 163 | > 164 | > [点击这里填写申请表](https://doc.weixin.qq.com/forms/AIoA8gcHAFMAScAhgZQABIlW6tV3l7QQf)或扫描下方二维码申请入群 165 | > 166 | > image 167 | -------------------------------------------------------------------------------- /SETTINGS.yaml: -------------------------------------------------------------------------------- 1 | # Debug Settings 2 | IS_DEBUG: false 3 | # Proxy Settings 4 | #MODEL_PROXY: http://127.0.0.1:7890 5 | #TOOL_PROXY: http://127.0.0.1:7890 6 | # Model Settings 7 | MODEL_PROVIDER: OAIClient 8 | #MODEL_URL: 9 | MODEL_AUTH: 10 | api_key: "" 11 | MODEL_OPTIONS: 12 | model: gpt-3.5-turbo 13 | # Application Settings 14 | MAX_COLUMN_NUM: 3 15 | OUTPUT_LANGUAGE: English 16 | MAX_SEARCH_RESULTS: 8 17 | SLEEP_TIME: 5 18 | # News time limit: 'd' as day,'w' as week,'m' as month 19 | NEWS_TIME_LIMIT: d 20 | # Outline Settings 21 | USE_CUSTOMIZE_OUTLINE: false 22 | CUSTOMIZE_OUTLINE: 23 | report_title: "Today's News about Large Model Applications" 24 | column_list: 25 | - column_title: New Apps 26 | column_requirement: Looking for those applications powered by large models which announced recently 27 | search_keywords: large model application announce this week 28 | - column_title: Hot Apps 29 | column_requirement: Looking for those applications powered by large models which are most popular or are discussed most 30 | search_keywords: large model application popular hot 31 | - column_title: Fun Apps 32 | column_requirement: Looking for those applications powered by large models which are funny or inspirational 33 | search_keywords: large model application cool fun inspire -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import Agently 2 | import utils.yaml_reader as yaml 3 | from utils.logger import Logger 4 | from workflows import main_workflow 5 | from utils.path import root_path 6 | 7 | # Settings and Logger 8 | SETTINGS = yaml.read("./SETTINGS.yaml") 9 | logger = Logger(console_level = "DEBUG" if SETTINGS.IS_DEBUG else "INFO") 10 | 11 | # Proxy 12 | model_proxy = ( 13 | SETTINGS.MODEL_PROXY 14 | if hasattr(SETTINGS, "MODEL_PROXY") 15 | else 16 | ( 17 | SETTINGS.PROXY 18 | if hasattr(SETTINGS, "PROXY") 19 | else None 20 | ) 21 | ) 22 | 23 | # Agent Factory 24 | agent_factory = ( 25 | Agently.AgentFactory(is_debug=SETTINGS.IS_DEBUG) 26 | .set_settings("current_model", SETTINGS.MODEL_PROVIDER) 27 | .set_settings(f"model.{ SETTINGS.MODEL_PROVIDER }.auth", SETTINGS.MODEL_AUTH) 28 | .set_settings(f"model.{ SETTINGS.MODEL_PROVIDER }.url", SETTINGS.MODEL_URL if hasattr(SETTINGS, "MODEL_URL") else None) 29 | .set_settings(f"model.{ SETTINGS.MODEL_PROVIDER }.options", SETTINGS.MODEL_OPTIONS if hasattr(SETTINGS, "MODEL_OPTIONS") else {}) 30 | .set_settings("proxy", model_proxy) 31 | ) 32 | 33 | # Start Workflow 34 | main_workflow.start( 35 | agent_factory=agent_factory, 36 | SETTINGS=SETTINGS, 37 | root_path=root_path, 38 | logger=logger, 39 | ) -------------------------------------------------------------------------------- /examples/Latest Updates on AI Models 2024-05-02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgentEra/Agently-Daily-News-Collector/bec8042823ccd6fead496efad480ec4bb56d8b98/examples/Latest Updates on AI Models 2024-05-02.pdf -------------------------------------------------------------------------------- /examples/Latest Updates on AI Models2024-05-02.md: -------------------------------------------------------------------------------- 1 | # Latest Updates on AI Models 2 | 3 | > 2024-05-02 Thursday 4 | 5 | ## Industry Trends 6 | 7 | ### PROLOGUE 8 | 9 | > The selected news articles are related to current trends and developments in the field of AI models. They cover various aspects of AI implementation across industries, from real-time pharma news delivery and control room optimization to accelerated adoption of AI and competitive dynamics in the AI sector. The articles provide insights into how AI is shaping operational efficiency, innovation, decision-making, and industry-specific tasks, reflecting the increasing utilization of AI technology for growth and productivity enhancement. 10 | 11 | ### NEWS LIST 12 | 13 | - [AppliedXL Collaborates with Bloomberg to Provide AI-Powered, Real-Time Pharma News on the Bloomberg Terminal](https://www.lelezard.com/en/news-21360719.html) 14 | 15 | - `[summray]` AppliedXL collaborates with Bloomberg to provide AI-powered, real-time pharma news on the Bloomberg Terminal. The collaboration aims to deliver key insights to help users stay ahead of catalyst events in the pharmaceutical industry. AppliedXL's AI technology analyzes live public data to uncover signals and trends, which are then distilled into early news stories included in real-time news feeds for early signal detection and market analysis. The collaboration focuses on the life sciences and biopharma space, alerting users to irregularities in clinical trial progressions and other market-moving events. AppliedXL's AI technology combines machine learning and human expertise to provide precise and contextualized information efficiently. 16 | - `[comment]` This news article discusses how AppliedXL collaborates with Bloomberg to provide AI-powered, real-time pharma news on the Bloomberg Terminal. The collaboration aims to deliver key insights to help users stay ahead of catalyst events in the pharmaceutical industry, showcasing the use of AI in delivering real-time industry updates. 17 | 18 | - [AI for control rooms](https://www.symmetrymagazine.org/article/ai-for-control-rooms?language_content_entity=und) 19 | 20 | - `[summray]` AI is being utilized in control rooms within the fields of particle physics and astrophysics to assist with complex tasks. From machine learning algorithms helping to keep particle beams flowing in accelerators to optimizing telescope scheduling for studying galaxies, AI is proving to be a valuable tool for scientists. Additionally, AI is being developed to aid electric grid operators in managing the increasing number of energy resources connecting to the grid. The goal is not to replace human operators but to enhance decision-making by presenting them with the best tool options immediately and learning from human feedback. 21 | - `[comment]` The article highlights the use of AI in control rooms within particle physics and astrophysics, assisting with complex tasks. It showcases how AI is enhancing decision-making and presenting the best tool options immediately to human operators, aligning with the current trend of utilizing AI to optimize processes. 22 | 23 | - [Six AI industry trends we're tracking in 2024 (and beyond)](https://diginomica.com/six-ai-industry-trends-were-tracking-2024-and-beyond) 24 | 25 | - `[summray]` In 2024, the adoption of AI across industries has accelerated significantly, with projections indicating that by 2040, 1.3 million businesses will be utilizing AI to drive innovation. Various sectors such as the telecom industry, manufacturing, energy, utilities, construction, asset-centric service providers, and defense companies are leveraging AI and automation to enhance operational efficiency, drive performance, accelerate evolution, alleviate challenges, transform fleet management, and strengthen cybersecurity. As organizations invest in advanced technology like AI to optimize processes and automate industry-specific tasks, the potential for growth and productivity enhancement is vast, signaling a shift towards more resilient and digitally transformed operations. 26 | - `[comment]` The content discusses the accelerated adoption of AI across various industries, enhancing operational efficiency and driving innovation. It reflects the trend of organizations investing in advanced technology like AI to optimize processes and automate industry-specific tasks for enhanced growth and productivity. 27 | 28 | - [Microsoft's Fear Of Google's AI Dominance Led To OpenAI Investment, Internal Email Reveals: 'We're Multiple Years Behind The Competition'](https://www.benzinga.com/news/24/05/38582364/microsofts-fear-of-googles-ai-dominance-led-to-openai-investment-internal-email-reveals-were-multipl) 29 | 30 | - `[summray]` An internal email from Microsoft Corp. revealed that the company's investment in OpenAI was motivated by the fear of falling behind Google in AI capabilities. Microsoft's chief technology officer Kevin Scott expressed concerns about the lack of machine learning scale, infrastructure, and development speed compared to Google and OpenAI. The email highlighted the intense competition in the AI space, with Microsoft investing over $13 billion in OpenAI to enhance various services. The email sheds light on the rivalry between Microsoft and Google in the AI sector, with Google introducing Bard (now Gemini) to compete with OpenAI's ChatGPT, facing some challenges during the launch. This news article reflects the current trends and developments in AI models and the competitive landscape in the industry. 31 | - `[comment]` The news reveals Microsoft's investment in OpenAI motivated by the fear of falling behind Google in AI capabilities. It sheds light on the intense competition in the AI space, showcasing the current trends and developments in AI models and the competitive landscape in the industry. 32 | 33 | - [Q1 2024 Cognizant Technology Solutions Corp Earnings Call](https://finance.yahoo.com/news/q1-2024-cognizant-technology-solutions-123608449.html) 34 | 35 | - `[summray]` Cognizant Technology Solutions reported on their Q1 2024 Earnings Call, highlighting progress against strategic priorities in a challenging demand environment. They delivered revenue growth exceeding guidance, expanded adjusted operating margin, and noted improvements in voluntary attrition. The company saw sequential growth in Health Sciences and Communications, Media and Technology, with declines in Financial Services. The demand environment remains uncertain, shifting client spending to cost-saving projects. Cognizant focuses on innovation, including AI, cloud, and digital technologies. They mentioned partnerships with Microsoft, Google Cloud, and NVIDIA for AI initiatives. The company emphasized the importance of collaboration, cited recognition for innovation, and highlighted their Bluebolt grassroots initiative. Overall, they aim to increase revenue growth, become an employer of choice, and simplify operations. 36 | - `[comment]` Cognizant Technology Solutions' Q1 2024 Earnings Call highlights their focus on innovation, including AI, cloud, and digital technologies. The partnerships with Microsoft, Google Cloud, and NVIDIA for AI initiatives showcase the ongoing trend of companies leveraging AI for growth and becoming employers of choice. 37 | 38 | ## Innovations and Research 39 | 40 | ### PROLOGUE 41 | 42 | > Recent innovations and breakthroughs in the AI models domain are highlighted in the selected news articles. China's advancements in AI technologies, including the SenseNova 5.0 large language model and Vidu text-to-video AI tool, demonstrate the country's commitment to cutting-edge AI developments. Additionally, the rise of generative AI is emphasized as a key trend for driving innovation and organizational growth. Furthermore, a team of researchers has outlined guidelines for the responsible use of machine learning in science, aiming to enhance credibility and reproducibility in research. Explore more about these advancements and guidelines in the following articles: [China's AI Advances](https://swarajyamag.com/technology/chinas-ai-advances-that-are-flying-under-the-radar), [Generative AI's Exponential Potential](https://www.forbes.com/sites/forbestechcouncil/2024/05/02/innovators-should-seize-on-generative-ais-exponential-potential/), [Science's AI Problem](https://www.sciencedaily.com/releases/2024/05/240501153055.htm) 43 | 44 | ### NEWS LIST 45 | 46 | - [China's AI Advances That Are Flying Under The Radar](https://swarajyamag.com/technology/chinas-ai-advances-that-are-flying-under-the-radar) 47 | 48 | - `[summray]` China is making significant advancements in Artificial Intelligence (AI), with recent releases rivalling those in the United States. SenseTime unveiled the SenseNova 5.0 large language model (LLM) with impressive capabilities in knowledge, mathematics, reasoning, and coding. The model surpasses OpenAI's GPT-4 Turbo and tops various multimodal benchmarks. Another innovation is Vidu, a text-to-video AI tool that can generate 16-second videos based on simple text prompts. Additionally, Stardust Intelligence introduced the Astribot S1 humanoid robot, capable of performing household chores and imitating human movements. China is demonstrating seriousness in its AI ambitions, with over 40 approved AI models for public use and a vision to empower billions of people with AI robot assistants. 49 | - `[comment]` China's advancements in AI, such as the SenseNova 5.0 large language model and Vidu text-to-video AI tool, showcase the country's commitment to innovative technologies in the AI domain. 50 | 51 | - [Innovators Should Seize On Generative AI's Exponential Potential](https://www.forbes.com/sites/forbestechcouncil/2024/05/02/innovators-should-seize-on-generative-ais-exponential-potential/) 52 | 53 | - `[summray]` Generative AI is identified as a significant trend in the tech industry that necessitates rapid adaptation. The market for generative AI is projected to grow rapidly, with organizations investing in the technology to drive innovation. McKinsey details how generative AI can accelerate organizational growth by rapidly processing information, writing code for self-improvement, and enhancing competitive edge. By utilizing generative AI tools tailored for each phase of innovation, organizations can revamp their innovation processes to tap into the technology's potential. The importance of experimentation, prototyping, and scaling is emphasized, with generative AI offering various tools to aid in these processes. The democratization of innovation across employees and the augmentation of emerging technologies hold promise for accelerating organization's adaptability and competitiveness in leveraging generative AI for innovation. 54 | - `[comment]` Generative AI is a crucial trend in tech, with potential to drive rapid innovation and organizational growth. Organizations should leverage generative AI tools for revolutionizing their innovation processes. 55 | 56 | - [Science has an AI problem: This group says they can fix it](https://www.sciencedaily.com/releases/2024/05/240501153055.htm) 57 | 58 | - `[summray]` An interdisciplinary team of 19 researchers, led by Princeton University computer scientists Arvind Narayanan and Sayash Kapoor, has published guidelines for the responsible use of machine learning in science to address the credibility crisis in research caused by deep flaws in machine learning methods. The guidelines focus on transparency and integrity, calling for detailed descriptions of machine learning models, code, data, hardware specifications, experimental design, and project goals. The aim is to ensure reproducibility of results, validate claims, and accelerate scientific progress by improving the quality of published papers. 59 | - `[comment]` A team of researchers has provided guidelines for responsible use of machine learning in science to address credibility issues. Transparency and integrity in machine learning models are crucial for reproducibility of results and scientific progress. 60 | 61 | ## Future Outlook 62 | 63 | ### PROLOGUE 64 | 65 | > The following article delves into the future prospects, challenges, and potential advancements of AI models in the context of business operations and employee dynamics. It explores the impact of technological advancements, particularly Artificial Intelligence (AI), on businesses and employees, focusing on layoffs resulting from automation. For more information, you can visit the article [Layoffs in the wake of technological advancements: The inherent benefits for businesses and employees](https://www.ghanaweb.com/GhanaHomePage/business/Layoffs-in-the-wake-of-technological-advancements-The-inherent-benefits-for-businesses-and-employees-1928854). 66 | 67 | ### NEWS LIST 68 | 69 | - [Layoffs in the wake of technological advancements: The inherent benefits for businesses and employees](https://www.ghanaweb.com/GhanaHomePage/business/Layoffs-in-the-wake-of-technological-advancements-The-inherent-benefits-for-businesses-and-employees-1928854) 70 | 71 | - `[summray]` The article discusses the impact of technological advancements, particularly Artificial Intelligence (AI), on businesses and employees, focusing on layoffs as a result of automation. It highlights the benefits and challenges of AI in the workplace, such as increased productivity, job displacement, and layoff exercises. The causes of layoffs, including economic downturns, technological advancements, restructuring, shifting consumer preferences, and cost-saving measures, are explored, along with the opportunities they bring for businesses. Additionally, the article outlines the benefits of layoffs for employees, such as severance packages, career reevaluation, increased market value, networking opportunities, personal growth, and entrepreneurial opportunities. 72 | - `[comment]` The article provides insights into the impact of technological advancements, specifically AI, on businesses and employees, highlighting the challenges and benefits associated with layoffs. It is a relevant read for understanding the future prospects of AI models in the workplace. 73 | 74 | 75 | 76 | --- 77 | 78 | Powered by [Agently AI Application Development Framework & Agently Workflow](https://github.com/Maplemx/Agently) 79 | 80 | Model Information:OAIClient - {'model': 'gpt-3.5-turbo'} 81 | 82 | **_Agently_** [Guidebook](https://github.com/Maplemx/Agently/blob/main/docs/guidebook) 83 | 84 | [Apply Developers WeChat Group](https://doc.weixin.qq.com/forms/AIoA8gcHAFMAScAhgZQABIlW6tV3l7QQf) or Scan QR Code to Apply. 85 | 86 | image -------------------------------------------------------------------------------- /logs/Agently_daily_news_collector.log: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /prompts/create_outline.yaml: -------------------------------------------------------------------------------- 1 | input: 2 | topic: ${topic} 3 | news_time_limit: ${news_time_limit} 4 | instruct: 5 | task: prepare news collection outline according {input.topic} and {input.news_time_limit} 6 | output language: ${language} 7 | output: 8 | report_title: 9 | $type: str 10 | $desc: generate a title for this news collection like "news about sports", "news about finance" 11 | column_list: 12 | $type: 13 | - column_title: 14 | $type: str 15 | $desc: title of this column even if this is the only one column 16 | column_requirement: 17 | $type: str 18 | $desc: describe recheck standard about the contents in this column to make sure all contents are aimed at the requirement of {input}'s topic 19 | search_keywords: 20 | $type: str 21 | $desc: search keywords for this column splited by space. make sure the filed keyword about {input} is included in keywords. 22 | $desc: the number of columns <= ${max_column_num} -------------------------------------------------------------------------------- /prompts/pick_news.yaml: -------------------------------------------------------------------------------- 1 | input: ${column_news} 2 | instruct: 3 | news select rules: 4 | - ${column_requirement} 5 | - if several news are similar, just retain the one with most famous source and output {can_use} as false for others 6 | output: 7 | - id: 8 | $type: int 9 | $desc: value from {input.[].id} 10 | can_use: 11 | $type: bool 12 | $desc: judge if {input.brief} can be used according {instruct} 13 | recommend_comment: 14 | $type: str 15 | $desc: provide your recommend comment if {can_use} == true, or just output null -------------------------------------------------------------------------------- /prompts/summarize.yaml: -------------------------------------------------------------------------------- 1 | input: ${news_content} 2 | info: 3 | column requirement: ${column_requirement} 4 | news title: ${news_title} 5 | instruct: 6 | output language: ${language} 7 | summary rule: 8 | - find and summarize the main content part of the news content which is collected from webpage 9 | - summary focus on relative content to {column requirement} and {news title} 10 | - summary in one paragraph without linebreak 11 | output: 12 | can_summarize: 13 | $type: bool 14 | $desc: judge if {input} has enough relative content to be summarized 15 | translated_title: 16 | $type: str 17 | $desc: translate {input.news title} into ${language} 18 | summary: 19 | $type: str 20 | $desc: summarize {input} according {info} and {instruct} if {can_summarize} == true, or output null 21 | -------------------------------------------------------------------------------- /prompts/write_column.yaml: -------------------------------------------------------------------------------- 1 | input: ${slimmed_news} 2 | info: 3 | column requirement: ${column_requirement} 4 | instruct: 5 | news select rules: 6 | - if there're serveral similar content news, only select one of them into {news_list} 7 | - all news selected must follow or be relative to {column requirement} 8 | output language: ${language} 9 | output: 10 | news_list: 11 | $type: 12 | - id: 13 | $type: int 14 | $desc: value from {input.[].id} 15 | recommend_comment: 16 | $type: str 17 | $desc: provide your recommend comment of this news according your role and {column requirement} 18 | $desc: select news into column list according {news select rules} from {input} 19 | prologue: 20 | $type: str 21 | $desc: write a prologue for readers according {news_list} and {news select rules}, you can use [](news url) to mark key information -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Agently==3.3.4.7 2 | PyYAML==6.0.1 3 | duckduckgo_search>=5.3.0 4 | beautifulsoup4>=4.12.3 5 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgentEra/Agently-Daily-News-Collector/bec8042823ccd6fead496efad480ec4bb56d8b98/utils/__init__.py -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | logging.getLogger().setLevel(logging.NOTSET) 5 | 6 | class Logger(object): 7 | def __init__(self, **kwargs): 8 | name = kwargs.get("name", "Agently-Daily-News-Collector") 9 | log_level = kwargs.get("log_level", "ERROR") 10 | console_level = kwargs.get("console_level", "INFO") 11 | log_format = kwargs.get("format", "%(asctime)s\t[%(levelname)s]\t%(message)s") 12 | log_path = kwargs.get("path", "./logs/Agently_daily_news_collector.log") 13 | handlers = kwargs.get("handlers", []) 14 | self.logger = logging.getLogger(name) 15 | if self.logger.hasHandlers(): 16 | self.logger.handlers.clear() 17 | stream_handler = logging.StreamHandler() 18 | stream_handler.setLevel(getattr(logging, console_level)) 19 | stream_handler.setFormatter(logging.Formatter(log_format)) 20 | self.logger.addHandler(stream_handler) 21 | file_handler = logging.FileHandler(log_path) 22 | file_handler.setLevel(getattr(logging, log_level)) 23 | file_handler.setFormatter(logging.Formatter(log_format)) 24 | self.logger.addHandler(file_handler) 25 | for handler in handlers: 26 | self.logger.addHandler(handler) 27 | 28 | def __transform(self, *args, **kwargs): 29 | message = "" 30 | for arg in args: 31 | message += f"{ arg }\t" 32 | message = message[:-1] 33 | kwargs_to_list = [] 34 | kwargs_message = "" 35 | for key, value in kwargs.items(): 36 | kwargs_to_list.append(f"{ key }: { str(value) }") 37 | kwargs_message += "\t".join(kwargs_to_list) 38 | if kwargs_message != "": 39 | message += f"\t{ kwargs_message }" 40 | return message 41 | 42 | def debug(self, *args, **kwargs): 43 | return self.logger.debug(self.__transform(*args, **kwargs)) 44 | 45 | def info(self, *args, **kwargs): 46 | return self.logger.info(self.__transform(*args, **kwargs)) 47 | 48 | def warning(self, *args, **kwargs): 49 | return self.logger.warning(self.__transform(*args, **kwargs)) 50 | 51 | def error(self, *args, **kwargs): 52 | return self.logger.error(self.__transform(*args, **kwargs)) 53 | 54 | def critical(self, *args, **kwargs): 55 | return self.logger.critical(self.__transform(*args, **kwargs)) 56 | 57 | logger = Logger() -------------------------------------------------------------------------------- /utils/path.py: -------------------------------------------------------------------------------- 1 | import os 2 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -------------------------------------------------------------------------------- /utils/yaml_reader.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from types import SimpleNamespace 3 | 4 | class YAMLResult(SimpleNamespace): 5 | pass 6 | 7 | def read(yaml_path:str): 8 | try: 9 | with open(yaml_path, "r") as yaml_file: 10 | yaml_dict = yaml.safe_load(yaml_file) 11 | return YAMLResult(**yaml_dict) 12 | except Exception as e: 13 | raise Exception(f"[YAML Reader] Error occured when read YAML from path '{ yaml_path }'.\nError: { str(e) }") -------------------------------------------------------------------------------- /workflows/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgentEra/Agently-Daily-News-Collector/bec8042823ccd6fead496efad480ec4bb56d8b98/workflows/__init__.py -------------------------------------------------------------------------------- /workflows/column_workflow.py: -------------------------------------------------------------------------------- 1 | import time 2 | import Agently 3 | from .tools.search import search 4 | from .tools.browse import browse 5 | 6 | def start(column_outline, *, agent_factory, SETTINGS, root_path, logger): 7 | tool_proxy = ( 8 | SETTINGS.TOOL_PROXY 9 | if hasattr(SETTINGS, "TOOL_PROXY") 10 | else 11 | ( 12 | SETTINGS.PROXY 13 | if hasattr(SETTINGS, "PROXY") 14 | else None 15 | ) 16 | ) 17 | logger.info("[Start Generate Column]", column_outline["column_title"]) 18 | column_workflow = Agently.Workflow() 19 | column_editor_agent = agent_factory.create_agent() 20 | # You can set column editor agent here, read https://github.com/Maplemx/Agently/tree/main/docs/guidebook to explore 21 | """ 22 | ( 23 | column_editor_agent 24 | .set_role("...") 25 | .set_user_info("...") 26 | ) 27 | """ 28 | 29 | # Define Workflow Chunks 30 | @column_workflow.chunk("start", type="Start") 31 | 32 | @column_workflow.chunk("search") 33 | def search_executor(inputs, storage): 34 | storage.set( 35 | "searched_news", 36 | search( 37 | column_outline["search_keywords"], 38 | timelimit=SETTINGS.NEWS_TIME_LIMIT if hasattr(SETTINGS, "NEWS_TIME_LIMIT") else "d", 39 | proxy=tool_proxy, 40 | logger=logger, 41 | ) 42 | ) 43 | 44 | @column_workflow.chunk("pick_news") 45 | def pick_news_executor(inputs, storage): 46 | searched_news = storage.get("searched_news", []) 47 | logger.info("[Search News Count]", len(searched_news)) 48 | if len(searched_news) > 0: 49 | pick_results = ( 50 | column_editor_agent 51 | .load_yaml_prompt( 52 | path=f"{ root_path }/prompts/pick_news.yaml", 53 | variables={ 54 | "column_news": searched_news, 55 | "column_requirement": column_outline["column_requirement"], 56 | } 57 | ) 58 | .start() 59 | ) 60 | # sleep to avoid requesting too often 61 | time.sleep(SETTINGS.SLEEP_TIME) 62 | picked_news = [] 63 | for pick_result in pick_results: 64 | if pick_result["can_use"]: 65 | news = searched_news[int(pick_result["id"])].copy() 66 | news.update({ "recommend_comment": pick_result["recommend_comment"] }) 67 | picked_news.append(news) 68 | storage.set("picked_news", picked_news) 69 | logger.info("[Picked News Count]", len(picked_news)) 70 | else: 71 | storage.set("picked_news", []) 72 | logger.info("[Picked News Count]", 0) 73 | 74 | @column_workflow.chunk("read_and_summarize") 75 | def read_and_summarize_executor(inputs, storage): 76 | picked_news = storage.get("picked_news", []) 77 | readed_news = [] 78 | if picked_news and len(picked_news) > 0: 79 | for news in picked_news: 80 | logger.info("[Summarzing]", news["title"]) 81 | news_content = browse( 82 | news["url"], 83 | proxy=tool_proxy, 84 | logger=logger, 85 | ) 86 | if news_content and news_content != "": 87 | try: 88 | summary_result = ( 89 | column_editor_agent 90 | .load_yaml_prompt( 91 | path=f"{ root_path }/prompts/summarize.yaml", 92 | variables={ 93 | "news_content": news_content, 94 | "column_requirement": column_outline["column_requirement"], 95 | "news_title": news["title"], 96 | "language": SETTINGS.OUTPUT_LANGUAGE, 97 | } 98 | ) 99 | .start() 100 | ) 101 | if summary_result["can_summarize"]: 102 | readed_news_info = news.copy() 103 | readed_news_info.update({ 104 | "title": summary_result["translated_title"], 105 | "summary": summary_result["summary"] 106 | }) 107 | readed_news.append(readed_news_info) 108 | logger.info("[Summarzing]", "Success") 109 | else: 110 | logger.info("[Summarzing]", "Failed") 111 | # sleep to avoid requesting too often 112 | time.sleep(SETTINGS.SLEEP_TIME) 113 | except Exception as e: 114 | logger.error(f"[Summarzie]: Can not summarize '{ news['title'] }'.\tError: { str(e) }") 115 | storage.set("readed_news", readed_news) 116 | 117 | @column_workflow.chunk("write_column") 118 | def write_column_executor(inputs, storage): 119 | readed_news = storage.get("readed_news", []) 120 | if readed_news and len(readed_news) > 0: 121 | slimmed_news = [] 122 | for index, news in enumerate(readed_news): 123 | slimmed_news.append({ 124 | "id": index, 125 | "title": news["title"], 126 | "summary": news["summary"], 127 | "url": news["url"], 128 | }) 129 | column_result = ( 130 | column_editor_agent 131 | .load_yaml_prompt( 132 | path=f"{ root_path }/prompts/write_column.yaml", 133 | variables={ 134 | "slimmed_news": slimmed_news, 135 | "column_requirement": column_outline["column_requirement"], 136 | "language": SETTINGS.OUTPUT_LANGUAGE, 137 | } 138 | ) 139 | .start() 140 | ) 141 | # sleep to avoid requesting too often 142 | time.sleep(SETTINGS.SLEEP_TIME) 143 | final_news_list = [] 144 | for news in column_result["news_list"]: 145 | id = news["id"] 146 | final_news_list.append({ 147 | "url": readed_news[id]["url"], 148 | "title": readed_news[id]["title"], 149 | "summary": readed_news[id]["summary"], 150 | "recommend_comment": news["recommend_comment"], 151 | }) 152 | storage.set("final_result", { 153 | "title": column_outline["column_title"], 154 | "prologue": column_result["prologue"], 155 | "news_list": final_news_list, 156 | }) 157 | else: 158 | storage.set("final_result", None) 159 | 160 | # Connect Chunks 161 | ( 162 | column_workflow.chunks["start"] 163 | .connect_to(column_workflow.chunks["search"]) 164 | .connect_to(column_workflow.chunks["pick_news"]) 165 | .connect_to(column_workflow.chunks["read_and_summarize"]) 166 | .connect_to(column_workflow.chunks["write_column"]) 167 | ) 168 | 169 | # Start Workflow 170 | column_workflow.start() 171 | 172 | return column_workflow.executor.store.get("final_result") 173 | -------------------------------------------------------------------------------- /workflows/main_workflow.py: -------------------------------------------------------------------------------- 1 | import time 2 | import Agently 3 | from datetime import datetime 4 | from .column_workflow import start as start_column_workflow 5 | 6 | def start(*, agent_factory, SETTINGS, root_path, logger): 7 | main_workflow = Agently.Workflow() 8 | chief_editor_agent = agent_factory.create_agent() 9 | # You can set chief editor agent here, read https://github.com/Maplemx/Agently/tree/main/docs/guidebook to explore 10 | """ 11 | ( 12 | chief_editor_agent 13 | .set_role("...") 14 | .set_user_info("...") 15 | ) 16 | """ 17 | 18 | # Define Workflow Chunks 19 | @main_workflow.chunk("start", type="Start") 20 | 21 | @main_workflow.chunk("input_topic") 22 | def input_topic_executor(inputs, storage): 23 | if not SETTINGS.USE_CUSTOMIZE_OUTLINE: 24 | storage.set( 25 | "topic", 26 | input("[Please input the topic of your news collection]: ") 27 | ) 28 | 29 | @main_workflow.chunk("generate_outline") 30 | def generate_outline_executor(inputs, storage): 31 | if SETTINGS.USE_CUSTOMIZE_OUTLINE: 32 | storage.set("outline", SETTINGS.CUSTOMIZE_OUTLINE) 33 | logger.info("[Use Customize Outline]", SETTINGS.CUSTOMIZE_OUTLINE) 34 | else: 35 | # Load prompt from /prompts/create_outline.yaml 36 | outline = ( 37 | chief_editor_agent 38 | .load_yaml_prompt( 39 | path=f"{ root_path }/prompts/create_outline.yaml", 40 | variables={ 41 | "topic": storage.get("topic"), 42 | "news_time_limit": SETTINGS.NEWS_TIME_LIMIT if hasattr(SETTINGS, "NEWS_TIME_LIMIT") else "d", 43 | "language": SETTINGS.OUTPUT_LANGUAGE, 44 | "max_column_num": SETTINGS.MAX_COLUMN_NUM, 45 | } 46 | ) 47 | .start() 48 | ) 49 | storage.set("outline", outline) 50 | logger.info("[Outline Generated]", outline) 51 | # sleep to avoid requesting too often 52 | time.sleep(SETTINGS.SLEEP_TIME) 53 | 54 | @main_workflow.chunk("generate_columns") 55 | def generate_columns_executor(inputs, storage): 56 | columns_data = [] 57 | outline = storage.get("outline") 58 | for column_outline in outline["column_list"]: 59 | column_data = start_column_workflow( 60 | column_outline=column_outline, 61 | agent_factory=agent_factory, 62 | SETTINGS=SETTINGS, 63 | root_path=root_path, 64 | logger=logger, 65 | ) 66 | if column_data: 67 | columns_data.append(column_data) 68 | logger.info("[Column Data Prepared]", column_data) 69 | storage.set("columns_data", columns_data) 70 | 71 | @main_workflow.chunk("generate_markdown") 72 | def generate_markdown_executor(inputs, storage): 73 | outline = storage.get("outline") 74 | columns_data = storage.get("columns_data") 75 | if columns_data and len(columns_data) > 0: 76 | # Main Title 77 | md_doc_text = f'# { outline["report_title"] }\n\n' 78 | md_doc_text += f'> { datetime.now().strftime("%Y-%m-%d %A") }\n\n' 79 | # Columns 80 | if SETTINGS.IS_DEBUG: 81 | logger.debug("[Columns Data]", columns_data) 82 | for column_data in columns_data: 83 | md_doc_text += f'## { column_data["title"] }\n\n### PROLOGUE\n\n' 84 | md_doc_text += f'> { column_data["prologue"] }\n\n' 85 | md_doc_text += f"### NEWS LIST\n\n" 86 | for single_news in column_data["news_list"]: 87 | md_doc_text += f'- [{ single_news["title"] }]({ single_news["url"] })\n\n' 88 | md_doc_text += f' - `[summray]` { single_news["summary"] }\n' 89 | md_doc_text += f' - `[comment]` { single_news["recommend_comment"] }\n\n' 90 | # Tailer 91 | md_doc_text +="\n\n---\n\nPowered by [Agently AI Application Development Framework & Agently Workflow](https://github.com/Maplemx/Agently)\n\n" 92 | md_doc_text += f"Model Information:{ SETTINGS.MODEL_PROVIDER if hasattr(SETTINGS, 'MODEL_PROVIDER') else 'OpenAI' } - { str(SETTINGS.MODEL_OPTIONS) if hasattr(SETTINGS, 'MODEL_OPTIONS') else 'Default Options' }\n\n" 93 | md_doc_text += '**_Agently_** [Guidebook](https://github.com/Maplemx/Agently/blob/main/docs/guidebook)\n\n[Apply Developers WeChat Group](https://doc.weixin.qq.com/forms/AIoA8gcHAFMAScAhgZQABIlW6tV3l7QQf) or Scan QR Code to Apply.\n\nimage' 94 | logger.info("[Markdown Generated]", md_doc_text) 95 | with open(f'{ root_path }/{ outline["report_title"] }_{ datetime.now().strftime("%Y-%m-%d") }.md', 'w', encoding='utf-8') as f: 96 | f.write(md_doc_text) 97 | else: 98 | logger.info("[Markdown Generation Failed] Due to have not any column data.") 99 | 100 | # Connect Chunks 101 | ( 102 | main_workflow.chunks["start"] 103 | .connect_to(main_workflow.chunks["input_topic"]) 104 | .connect_to(main_workflow.chunks["generate_outline"]) 105 | .connect_to(main_workflow.chunks["generate_columns"]) 106 | .connect_to(main_workflow.chunks["generate_markdown"]) 107 | ) 108 | 109 | # Start Workflow 110 | main_workflow.start() -------------------------------------------------------------------------------- /workflows/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgentEra/Agently-Daily-News-Collector/bec8042823ccd6fead496efad480ec4bb56d8b98/workflows/tools/__init__.py -------------------------------------------------------------------------------- /workflows/tools/browse.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | from bs4 import BeautifulSoup 4 | 5 | def browse(url, *, logger=None, proxy=None): 6 | content = "" 7 | try: 8 | request_options = { 9 | "headers": { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" } 10 | } 11 | if proxy: 12 | if proxy.startswith("http:"): 13 | request_options.update({ "proxies": { "http": proxy } }) 14 | elif proxy.startswith("https:"): 15 | request_options.update({ "proxies": { "https": proxy } }) 16 | page = requests.get( 17 | url, 18 | **request_options 19 | ) 20 | soup = BeautifulSoup(page.content, "html.parser") 21 | # find text in p, list, pre (github code), td 22 | chunks = soup.find_all(["h1", "h2", "h3", "h4", "h5", "p", "pre", "td"]) 23 | for chunk in chunks: 24 | if chunk.name.startswith("h"): 25 | content += "#" * int(chunk.name[-1]) + " " + chunk.get_text() + "\n" 26 | else: 27 | text = chunk.get_text() 28 | if text and text != "": 29 | content += text + "\n" 30 | # find text in div that class=content 31 | divs = soup.find("div", class_="content") 32 | if divs: 33 | chunks_with_text = divs.find_all(text=True) 34 | for chunk in chunks_with_text: 35 | if isinstance(chunk, str) and chunk.strip(): 36 | content += chunk.strip() + "\n" 37 | content = re.sub(r"\n+", "\n", content) 38 | return content 39 | except Exception as e: 40 | if logger: 41 | logger.error(f"[Browse]: Can not browse '{ url }'.\tError: { str(e) }") 42 | return "" -------------------------------------------------------------------------------- /workflows/tools/search.py: -------------------------------------------------------------------------------- 1 | from duckduckgo_search import DDGS 2 | 3 | def search(keywords, **kwargs): 4 | results = [] 5 | try: 6 | with DDGS(proxy=kwargs.get("proxy", None)) as ddgs: 7 | for index, result in enumerate( 8 | ddgs.news( 9 | keywords, 10 | max_results=kwargs.get("max_results", 8), 11 | timelimit=kwargs.get("timelimit", "d"), 12 | ) 13 | ): 14 | results.append({ 15 | "id": index, 16 | "title": result["title"], 17 | "brief": result["body"], 18 | "url": result["url"], 19 | "source": result["source"], 20 | "date": result["date"], 21 | }) 22 | return results 23 | except Exception as e: 24 | if "logger" in kwargs: 25 | kwargs["logger"].error(f"[Search]: Can not search '{ keywords }'.\tError: { str(e) }") 26 | return [] 27 | --------------------------------------------------------------------------------