├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── README_CN.md
├── SETTINGS.yaml
├── app.py
├── examples
    ├── Latest Updates on AI Models 2024-05-02.pdf
    └── Latest Updates on AI Models2024-05-02.md
├── logs
    └── Agently_daily_news_collector.log
├── prompts
    ├── create_outline.yaml
    ├── pick_news.yaml
    ├── summarize.yaml
    └── write_column.yaml
├── requirements.txt
├── utils
    ├── __init__.py
    ├── logger.py
    ├── path.py
    └── yaml_reader.py
└── workflows
    ├── __init__.py
    ├── column_workflow.py
    ├── main_workflow.py
    └── tools
        ├── __init__.py
        ├── browse.py
        └── search.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | # MacOS
163 | .DS_Store
164 | 
165 | # Agently
166 | .Agently
167 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY . .
 6 | 
 7 | RUN pip install --no-cache-dir -r requirements.txt
 8 | 
 9 | CMD ["python", "app.py"]
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div style="text-align:center">
  2 | 
  3 | <h1>Agently-Daily-News-Collector</h1>
  4 | 
  5 | <b>English | <a href = "./README_CN.md">中文说明</a></b>
  6 | 
  7 | </div>
  8 | 
  9 | **Agently Daily News Collector** is an open-source LLM based automatically news collecting workflow showcase project powered by [**_<font color = "red">Agent</font><font color = "blue">ly</font>_** AI application development framework](https://github.com/Maplemx/Agently).
 10 | 
 11 | You can use this project to generate almost any topic of news collection. All you need to do is simply input the field topic of your news collection. Then you wait and the AI agents will do their jobs automatically until a high quality news collection is generated and saved into a markdown file.
 12 | 
 13 | News collection file examples:
 14 | 
 15 | `MarkDown File` [Lastest Updated on AI Models 2024-05-02](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/examples/Latest%20Updates%20on%20AI%20Models2024-05-02.md)
 16 | 
 17 | `PDF File` [Lastest Updated on AI Models 2024-05-02](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/examples/Latest%20Updates%20on%20AI%20Models%202024-05-02.pdf)
 18 | 
 19 | > **ℹ️ Notice:**
 20 | > 
 21 | > Visit https://github.com/Maplemx/Agently if you want to learn more about **_<font color = "red">Agent</font><font color = "blue">ly</font>_** AI Application development framework.
 22 | 
 23 | ## How to Use
 24 | 
 25 | ### Step 1: Clone this repo
 26 | 
 27 | Run this command in shell:
 28 | 
 29 | ```shell
 30 | git clone git@github.com:AgentEra/Agently-Daily-News-Collector.git
 31 | ```
 32 | 
 33 | ### Step 2: Edit settings YAML file
 34 | 
 35 | You can find [`SETTINGS.yaml`](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/SETTINGS.yaml) file in the project dir.
 36 | 
 37 | Input your model's API key and change other settings as your wish.
 38 | 
 39 | If you want to use other model, you can read [this document](https://github.com/Maplemx/Agently/blob/main/docs/guidebook/application_development_handbook.ipynb) or [this Agently official website page](http://agently.tech/features/model_request.html) to see how to set the settings.
 40 | 
 41 | ### Step 3: Start
 42 | 
 43 | Because this project is a Python project, you need to install Python first. You can find installation instruction on [Python official website](https://www.python.org/).
 44 | 
 45 | At the first time to run this project, you should use this command in shell to download and install dependency packages:
 46 | 
 47 | ```shell
 48 | pip install -r path/to/project/requirements.txt
 49 | ```
 50 | 
 51 | Wait until the dependency packages are installed then use this command in shell to start the generation process.
 52 | 
 53 | ```shell
 54 | python path/to/project/app.py
 55 | ```
 56 | 
 57 | You will see a tip `[Please input the topic of your daily news collection]:`.
 58 | 
 59 | Input your topic idea about the field of news that you want to collect, then you're good to go.
 60 | 
 61 | During the process, there'll be some logs printed to shell to present what tasks are done like this:
 62 | 
 63 | ```shell
 64 | 2024-05-02 22:44:27,347 [INFO]  [Outline Generated] {'report_title': "Today's news about AI Models Appliaction", 'column_list': [{'column_title': 'Latest News', 'column_requirement': 'The content is related to AI Models Appliaction, and the time is within 24 hours', 'search_keywords': 'AI Models Appliaction news latest'}, {'column_title': 'Hot News', 'column_requirement': 'The content is related to AI Models Appliaction, and the interaction is high', 'search_keywords': 'AI Models Appliaction news hot'}, {'column_title': 'Related News', 'column_requirement': 'The content is related to AI Models Appliaction, but not news', 'search_keywords': 'AI Models Appliaction report'}]}
 65 | 2024-05-02 22:44:32,352 [INFO]  [Start Generate Column] Latest News
 66 | 2024-05-02 22:44:34,132 [INFO]  [Search News Count] 8
 67 | 2024-05-02 22:44:46,062 [INFO]  [Picked News Count] 2
 68 | 2024-05-02 22:44:46,062 [INFO]  [Summarzing]    With Support from AWS, Yseop Develops a Unique Generative AI Application for Regulatory Document Generation Across BioPharma
 69 | 2024-05-02 22:44:52,579 [INFO]  [Summarzing]    Success
 70 | 2024-05-02 22:44:57,580 [INFO]  [Summarzing]    Over 500 AI models are now optimised for Core Ultra processors, says Intel
 71 | 2024-05-02 22:45:02,130 [INFO]  [Summarzing]    Success
 72 | 2024-05-02 22:45:19,475 [INFO]  [Column Data Prepared]  {'title': 'Latest News', 'prologue': 'Stay up-to-date with the latest advancements in AI technology with these news updates: [Yseop Partners with AWS to Develop Generative AI for BioPharma](https://finance.yahoo.com/news/support-aws-yseop-develops-unique-130000171.html) and [Intel Optimizes Over 500 AI Models for Core Ultra Processors](https://www.business-standard.com/technology/tech-news/over-500-ai-models-are-now-optimised-for-core-ultra-processors-says-intel-124050200482_1.html).', 'news_list': [{'url': 'https://finance.yahoo.com/news/support-aws-yseop-develops-unique-130000171.html', 'title': 'With Support from AWS, Yseop Develops a Unique Generative AI Application for Regulatory Document Generation Across BioPharma', 'summary': "Yseop utilizes AWS to create a new Generative AI application for the Biopharma sector. This application leverages AWS for its scalability and security, and it allows Biopharma companies to bring pharmaceuticals and vaccines to the market more quickly. Yseop's platform integrates LLM models for generating scientific content while meeting the security standards of the pharmaceutical industry.", 'recommend_comment': 'AWS partnership helps Yseop develop an innovative Generative AI application for the BioPharma industry, enabling companies to expedite the delivery of pharmaceuticals and vaccines to market. The integration of LLM models and compliance with stringent pharmaceutical industry security standards make this a valuable solution for BioPharma companies.'}, {'url': 'https://www.business-standard.com/technology/tech-news/over-500-ai-models-are-now-optimised-for-core-ultra-processors-says-intel-124050200482_1.html', 'title': 'Over 500 AI models are now optimised for Core Ultra processors, says Intel', 'summary': 'Intel stated over 500 AI models are optimized for Core Ultra processors. These models are accessible from well-known sources like OpenVINO Model Zoo, Hugging Face, ONNX Model Zoo, and PyTorch.', 'recommend_comment': "Intel's optimization of over 500 AI models for Core Ultra processors provides access to a vast selection of pre-trained models from reputable sources. This optimization enhances the performance and efficiency of AI applications, making it easier for developers to deploy AI solutions on Intel-based hardware."}]}
 73 | ```
 74 | 
 75 | Whole process will take some time, so just relax and have some rest☕️.
 76 | 
 77 | ### Step 4: Get your news collection markdown file!
 78 | 
 79 | When the process is done finally, you will see a tip like this with markdown text that generated printed on screen:
 80 | 
 81 | ```shell
 82 | 2024-05-02 21:57:20,521 [INFO] [Markdown Generated]
 83 | ```
 84 | 
 85 | Then you can find a markdown file named `<collection name> <generated date>.md` in your project dir.
 86 | 
 87 | Enjoy it! 😄
 88 | 
 89 | ---
 90 | 
 91 | ## Mainly Dependencies
 92 | 
 93 | - **Agently AI Development Framework**: https://github.com/Maplemx/Agently | https://pypi.org/project/Agently/
 94 | - **duckduckgo-search**: https://pypi.org/project/duckduckgo-search/
 95 | - **BeautifulSoup4**: https://pypi.org/project/beautifulsoup4/
 96 | - **PyYAM**L: https://pypi.org/project/pyyaml/
 97 | 
 98 | ---
 99 | 
100 | Please ⭐️ this repo and [Agently](https://github.com/Maplemx/Agently) main repo if you like it! Thank you very much!
101 | 
102 | > 💡 Ideas / Bug Report: [Report Issues Here](https://github.com/AgentEra/Agently-Daily-News-Collector/issues)
103 | >
104 | > 📧 Email Us: [developer@agently.cn](mailto:developer@agently.cn)
105 | >
106 | > 👾 Discord Group:
107 | >
108 | > [Click Here to Join](https://discord.gg/4HnarMBpYT) or Scan the QR Code Down Below
109 | >
110 | > <img width="120" alt="image" src="https://github.com/Maplemx/Agently/assets/4413155/089c239c-6133-4844-840c-b48c42ccbad1">
111 | >
112 | > 💬 WeChat Group（加入微信群）:
113 | >
114 | >  [Click Here to Apply](https://doc.weixin.qq.com/forms/AIoA8gcHAFMAScAhgZQABIlW6tV3l7QQf) or Scan the QR Code Down Below
115 | >
116 | > <img width="120" alt="image" src="https://github.com/Maplemx/Agently/assets/4413155/fb95e15e-c6bd-4dd4-8fc9-99285df9d443">


--------------------------------------------------------------------------------
/README_CN.md:
--------------------------------------------------------------------------------
  1 | <div style="text-align:center">
  2 | 
  3 | <h1>Agently-Daily-News-Collector</h1>
  4 | 
  5 | <h3>Agently 新闻汇总报告生成器</h3>
  6 | 
  7 | <b><a href = "./README.md">English Introduction</a> | 中文说明</b>
  8 | 
  9 | </div>
 10 | 
 11 | **Agently新闻汇总报告生成器**是一个基于[**_<font color = "red">Agent</font><font color = "blue">ly</font>_** AI应用开发框架](https://github.com/Maplemx/Agently)开发的应用项目。本项目构建了**基于大语言模型驱动的全自动工作流**，能够根据用户输入的主题关键词，自动完成新闻汇总报告的结构设计、栏目组稿（含新闻检索、筛查、总结、栏目信息撰写）及报告MarkDown格式文件的输出全过程。同时，本项目**完全开源**，欢迎开发者们通过Fork->PR的方式共同优化。
 12 | 
 13 | 新闻汇总报告的样例可参考：
 14 | 
 15 | `MarkDown文件` [Lastest Updated on AI Models 2024-05-02](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/examples/Latest%20Updates%20on%20AI%20Models2024-05-02.md)
 16 | 
 17 | `PDF文件` [Lastest Updated on AI Models 2024-05-02](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/examples/Latest%20Updates%20on%20AI%20Models%202024-05-02.pdf)
 18 | 
 19 | > 如果您希望进一步了解[**_<font color = "red">Agent</font><font color = "blue">ly</font>_** AI应用开发框架](https://github.com/Maplemx/Agently)，您可以访问框架的[主仓库地址](https://github.com/Maplemx/Agently)或是[中文官网](http://Agently.cn)阅读更多相关信息，框架提供了丰富的教程和案例，帮助您逐步上手。
 20 | 
 21 | ## 如何使用
 22 | 
 23 | ### 第一步：将本仓库Clone到本地
 24 | 
 25 | 在您的开发目录中使用以下Shell脚本指令：
 26 | 
 27 | ```shell
 28 | git clone git@github.com:AgentEra/Agently-Daily-News-Collector.git
 29 | ```
 30 | 
 31 | ### 第二步：修改SETTINGS.yaml设置文件
 32 | 
 33 | 您可以在Clone到本地的项目文件夹中找到[`SETTINGS.yaml`](https://github.com/AgentEra/Agently-Daily-News-Collector/blob/main/SETTINGS.yaml)这个文件，再根据您的需要修改其中的设置项即可。
 34 | 
 35 | 下面是具体的设置项说明：
 36 | 
 37 | ```yaml
 38 | # Debug Settings
 39 | IS_DEBUG: false # 如果此项为true，将会输出更多执行过程信息，包括搜索和模型请求的明细信息
 40 | # Proxy Settings
 41 | PROXY: http://127.0.0.1:7890 # 项目中的搜索和模型请求可能会需要使用前向代理，可以通过此项设置代理信息
 42 | # Model Settings
 43 | MODEL_PROVIDER: OAIClient #默认使用OpenAI格式的兼容客户端，此客户端能够适配OpenAI以及各类兼容OpenAI格式的本地模型
 44 | MODEL_URL: http://base_url_path # 如果您需要修改Base URL，使用此项进行设置
 45 | MODEL_AUTH:
 46 |   api_key: "" # 在这里输入鉴权用的API-Key信息
 47 | MODEL_OPTIONS: # 在这里指定模型需要的其他参数，如指定具体的模型，或是调整temperature
 48 |   model: gpt-3.5-turbo
 49 |   temperature: 0.8
 50 | # Application Settings
 51 | MAX_COLUMN_NUM: 3 # 在这里设置汇总报告结构中的专栏数量 
 52 | OUTPUT_LANGUAGE: Chinese # 在这里设置汇总报告的输出语种，默认为英语，您可能需要手动改成中文
 53 | MAX_SEARCH_RESULTS: 8 # 在这里设置每个栏目搜索的最大结果数量
 54 | # 注意，如果数量设置过大，可能会导致超出模型的处理窗口大小，请根据模型具体情况设置
 55 | SLEEP_TIME: 5 # 在这里设置每次模型请求后的等待时间，以防止频繁请求导致模型拒绝访问
 56 | ```
 57 | 
 58 | 如果您想要了解切换其他模型的更多细节，可以阅读Agently官方网站关于[模型设置的说明页面](http://agently.tech/features/model_request.html)。
 59 | 
 60 | ### 第三步：启动任务
 61 | 
 62 | 因为本项目为Python项目，您需要在本地安装Python环境。您可以在[Python官方网站](https://www.python.org/)找到适合您的安装方法。
 63 | 
 64 | 然后，在您的项目目录下使用以下Shell脚本指令更新项目依赖包：
 65 | 
 66 | ```shell
 67 | pip install -r requirements.txt
 68 | ```
 69 | 
 70 | 依赖包安装完毕后，通过以下Shell脚本指令即可启动：
 71 | 
 72 | ```shell
 73 | python app.py
 74 | ```
 75 | 
 76 | 随后您会看到一个提示：`[Please input the topic of your daily news collection]:`。
 77 | 
 78 | 根据提示输入您想要汇总的新闻领域主题关键词，或是用一句话描述您想要生成什么样的新闻汇总报告，然后任务就会开始自动运行了。在这里，您可以输入任何语种的内容，但生成内容的语种会和您在第二步中的设置的语种要求相同。
 79 | 
 80 | 接下来您就可以等待运行的结果了，整个过程大约需要5-8分钟。
 81 | 
 82 | 在运行的过程中，您会看到类似下面展示的输出日志，这些日志将帮助您了解当前在处理的任务，以及运行的关键进展情况：
 83 | 
 84 | ```shell
 85 | 2024-05-02 22:44:27,347 [INFO]  [Outline Generated] {'report_title': "Today's news about AI Models Appliaction", 'column_list': [{'column_title': 'Latest News', 'column_requirement': 'The content is related to AI Models Appliaction, and the time is within 24 hours', 'search_keywords': 'AI Models Appliaction news latest'}, {'column_title': 'Hot News', 'column_requirement': 'The content is related to AI Models Appliaction, and the interaction is high', 'search_keywords': 'AI Models Appliaction news hot'}, {'column_title': 'Related News', 'column_requirement': 'The content is related to AI Models Appliaction, but not news', 'search_keywords': 'AI Models Appliaction report'}]}
 86 | 2024-05-02 22:44:32,352 [INFO]  [Start Generate Column] Latest News
 87 | 2024-05-02 22:44:34,132 [INFO]  [Search News Count] 8
 88 | 2024-05-02 22:44:46,062 [INFO]  [Picked News Count] 2
 89 | 2024-05-02 22:44:46,062 [INFO]  [Summarzing]    With Support from AWS, Yseop Develops a Unique Generative AI Application for Regulatory Document Generation Across BioPharma
 90 | 2024-05-02 22:44:52,579 [INFO]  [Summarzing]    Success
 91 | 2024-05-02 22:44:57,580 [INFO]  [Summarzing]    Over 500 AI models are now optimised for Core Ultra processors, says Intel
 92 | 2024-05-02 22:45:02,130 [INFO]  [Summarzing]    Success
 93 | 2024-05-02 22:45:19,475 [INFO]  [Column Data Prepared]  {'title': 'Latest News', 'prologue': 'Stay up-to-date with the latest advancements in AI technology with these news updates: [Yseop Partners with AWS to Develop Generative AI for BioPharma](https://finance.yahoo.com/news/support-aws-yseop-develops-unique-130000171.html) and [Intel Optimizes Over 500 AI Models for Core Ultra Processors](https://www.business-standard.com/technology/tech-news/over-500-ai-models-are-now-optimised-for-core-ultra-processors-says-intel-124050200482_1.html).', 'news_list': [{'url': 'https://finance.yahoo.com/news/support-aws-yseop-develops-unique-130000171.html', 'title': 'With Support from AWS, Yseop Develops a Unique Generative AI Application for Regulatory Document Generation Across BioPharma', 'summary': "Yseop utilizes AWS to create a new Generative AI application for the Biopharma sector. This application leverages AWS for its scalability and security, and it allows Biopharma companies to bring pharmaceuticals and vaccines to the market more quickly. Yseop's platform integrates LLM models for generating scientific content while meeting the security standards of the pharmaceutical industry.", 'recommend_comment': 'AWS partnership helps Yseop develop an innovative Generative AI application for the BioPharma industry, enabling companies to expedite the delivery of pharmaceuticals and vaccines to market. The integration of LLM models and compliance with stringent pharmaceutical industry security standards make this a valuable solution for BioPharma companies.'}, {'url': 'https://www.business-standard.com/technology/tech-news/over-500-ai-models-are-now-optimised-for-core-ultra-processors-says-intel-124050200482_1.html', 'title': 'Over 500 AI models are now optimised for Core Ultra processors, says Intel', 'summary': 'Intel stated over 500 AI models are optimized for Core Ultra processors. These models are accessible from well-known sources like OpenVINO Model Zoo, Hugging Face, ONNX Model Zoo, and PyTorch.', 'recommend_comment': "Intel's optimization of over 500 AI models for Core Ultra processors provides access to a vast selection of pre-trained models from reputable sources. This optimization enhances the performance and efficiency of AI applications, making it easier for developers to deploy AI solutions on Intel-based hardware."}]}
 94 | ```
 95 | ### 第四步：得到一份新鲜出炉的新闻汇总报告📰！
 96 | 
 97 | 在整个处理过程结束时，您将会看到类似下方的提示，并可以看到完整的报告MarkDown格式结果被输出到屏幕上：
 98 | 
 99 | ```shell
100 | 2024-05-02 21:57:20,521 [INFO] [Markdown Generated]
101 | ```
102 | 
103 | 同时，您也可以在您的项目文件夹中找到一份命名格式为`<汇总报告名称> <生成日期>.md`的文件。
104 | 
105 | 大功告成！🎉
106 | 
107 | ---
108 | 
109 | ## 常见问题（FAQ）
110 | 
111 | **1. 为什么提示请求超时（如：operation timed out，504错误等）或是出现搜索结果为0的情况？**
112 | 
113 | 通常情况下是因为无法请求外网造成的，需要通过配置Proxy代理解决，配置方法请参考上文中SETTINGS.yaml的PROXY配置项
114 | 
115 | **2. 我用的国内模型，为什么还需要配置Proxy？**
116 | 
117 | 本项目默认使用的搜索工具是外网的搜索工具，所以即使模型本身不存在请求访问限制的问题，也需要使用Proxy代理。
118 | 
119 | **3. 我机子上开着VPN或者其他代理软件呢，为什么还需要配置Proxy？**
120 | 
121 | 因为大部分情况下，在代理软件没有开启特定的全局系统请求代理模式（如Clash的TUN模式）前，Python脚本在运行时，不会使用代理软件在本地启动的代理服务接口来发起网络请求，而是会直接对目标网址进行请求。因此，需要通过配置Proxy来帮助Python脚本调用代理软件在本地启动的代理服务接口。
122 | 
123 | 以Clash客户端为例，你可以从这个位置找到代理服务的端口：
124 | 
125 | <img width="320" alt="image" src="https://github.com/AgentEra/Agently-Daily-News-Collector/assets/4413155/b94774be-1a9f-4be1-9071-96131eecf4fd">
126 | 
127 | 根据上图，本地的Proxy服务地址应该为http://127.0.0.1:7890
128 | 
129 | **4. 我自己有更好用的搜索/浏览工具，我能不能干脆直接修改掉默认的搜索或者浏览工具？**
130 | 
131 | 当然可以，我们在项目中对不同的模块都做了解耦，要修改搜索工具，只需要修改/workflows/tools/search.py文件即可，而要修改浏览工具，只需要修改/workflows/tools/browse.py文件即可。
132 | 
133 | 我们也非常欢迎您将自己修改优化的好用的工具PR给项目，分享给更多的人使用，这类贡献者我们还会在项目首页进行署名感谢！
134 | 
135 | **5. 都说到这里了，我还能修改/定制/优化哪些地方？**
136 | 
137 | 您可以修改的内容包括：
138 | 
139 | 1. /workflows 文件夹中的处理工作流程，比如，添加一个愚人节处理工作流，来生成洋葱新闻
140 | 2. /workflows/tools 文件夹中的搜索和浏览工具，或是结合新增处理工作流，添加更多你觉得有用的工具
141 | 3. /prompts 文件夹中的所有Prompt YAML文件，这些文件都非常易读易修改，欢迎您修改出更好的Prompt，或是简单地将英文Prompt调整为中文
142 | 
143 | ---
144 | 
145 | ## 主要依赖说明
146 | 
147 | - Agently AI应用开发框架：https://github.com/Maplemx/Agently | https://pypi.org/project/Agently/ | http://Agently.cn
148 | - duckduckgo-search: https://pypi.org/project/duckduckgo-search/
149 | - BeautifulSoup4: https://pypi.org/project/beautifulsoup4/
150 | - PyYAML: https://pypi.org/project/pyyaml/
151 | 
152 | ---
153 | 
154 | 如果您喜欢这个项目，请为本项目以及[Agently框架主仓库](https://github.com/Maplemx/Agently)点亮⭐️。
155 | 
156 | 如果您希望了解更多关于本项目的线上产品化版本信息，欢迎通过下面的方式加入我们的讨论群，我们将在近期组织线上产品化版本的测试。
157 | 
158 | > 💡 意见反馈/Bug提交: [Report Issues Here](https://github.com/AgentEra/Agently-Daily-News-Collector/issues)
159 | >
160 | > 📧 联系我们: [developer@agently.cn](mailto:developer@agently.cn)
161 | >
162 | > 💬 加入微信讨论群:
163 | >
164 | >  [点击这里填写申请表](https://doc.weixin.qq.com/forms/AIoA8gcHAFMAScAhgZQABIlW6tV3l7QQf)或扫描下方二维码申请入群
165 | >
166 | > <img width="120" alt="image" src="https://github.com/Maplemx/Agently/assets/4413155/fb95e15e-c6bd-4dd4-8fc9-99285df9d443">
167 | 


--------------------------------------------------------------------------------
/SETTINGS.yaml:
--------------------------------------------------------------------------------
 1 | # Debug Settings
 2 | IS_DEBUG: false
 3 | # Proxy Settings
 4 | #MODEL_PROXY: http://127.0.0.1:7890
 5 | #TOOL_PROXY: http://127.0.0.1:7890
 6 | # Model Settings
 7 | MODEL_PROVIDER: OAIClient
 8 | #MODEL_URL: 
 9 | MODEL_AUTH:
10 |   api_key: "<Input Your API Key Here>"
11 | MODEL_OPTIONS:
12 |   model: gpt-3.5-turbo
13 | # Application Settings
14 | MAX_COLUMN_NUM: 3
15 | OUTPUT_LANGUAGE: English
16 | MAX_SEARCH_RESULTS: 8
17 | SLEEP_TIME: 5
18 | # News time limit: 'd' as day,'w' as week,'m' as month
19 | NEWS_TIME_LIMIT: d
20 | # Outline Settings
21 | USE_CUSTOMIZE_OUTLINE: false
22 | CUSTOMIZE_OUTLINE:
23 |   report_title: "Today's News about Large Model Applications"
24 |   column_list:
25 |     - column_title: New Apps
26 |       column_requirement: Looking for those applications powered by large models which announced recently
27 |       search_keywords: large model application announce this week
28 |     - column_title: Hot Apps
29 |       column_requirement: Looking for those applications powered by large models which are most popular or are discussed most
30 |       search_keywords: large model application popular hot 
31 |     - column_title: Fun Apps
32 |       column_requirement: Looking for those applications powered by large models which are funny or inspirational
33 |       search_keywords: large model application cool fun inspire


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | import Agently
 2 | import utils.yaml_reader as yaml
 3 | from utils.logger import Logger
 4 | from workflows import main_workflow
 5 | from utils.path import root_path
 6 | 
 7 | # Settings and Logger
 8 | SETTINGS = yaml.read("./SETTINGS.yaml")
 9 | logger = Logger(console_level = "DEBUG" if SETTINGS.IS_DEBUG else "INFO")
10 | 
11 | # Proxy
12 | model_proxy = (
13 |     SETTINGS.MODEL_PROXY
14 |     if hasattr(SETTINGS, "MODEL_PROXY")
15 |     else
16 |     (
17 |         SETTINGS.PROXY
18 |         if hasattr(SETTINGS, "PROXY")
19 |         else None
20 |     ) 
21 | )
22 | 
23 | # Agent Factory
24 | agent_factory = (
25 |     Agently.AgentFactory(is_debug=SETTINGS.IS_DEBUG)
26 |         .set_settings("current_model", SETTINGS.MODEL_PROVIDER)
27 |         .set_settings(f"model.{ SETTINGS.MODEL_PROVIDER }.auth", SETTINGS.MODEL_AUTH)
28 |         .set_settings(f"model.{ SETTINGS.MODEL_PROVIDER }.url", SETTINGS.MODEL_URL if hasattr(SETTINGS, "MODEL_URL") else None)
29 |         .set_settings(f"model.{ SETTINGS.MODEL_PROVIDER }.options", SETTINGS.MODEL_OPTIONS if hasattr(SETTINGS, "MODEL_OPTIONS") else {})
30 |         .set_settings("proxy", model_proxy)
31 | )
32 | 
33 | # Start Workflow
34 | main_workflow.start(
35 |     agent_factory=agent_factory,
36 |     SETTINGS=SETTINGS,
37 |     root_path=root_path,
38 |     logger=logger,
39 | )


--------------------------------------------------------------------------------
/examples/Latest Updates on AI Models 2024-05-02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AgentEra/Agently-Daily-News-Collector/bec8042823ccd6fead496efad480ec4bb56d8b98/examples/Latest Updates on AI Models 2024-05-02.pdf


--------------------------------------------------------------------------------
/examples/Latest Updates on AI Models2024-05-02.md:
--------------------------------------------------------------------------------
 1 | # Latest Updates on AI Models
 2 | 
 3 | > 2024-05-02 Thursday
 4 | 
 5 | ## Industry Trends
 6 | 
 7 | ### PROLOGUE
 8 | 
 9 | > The selected news articles are related to current trends and developments in the field of AI models. They cover various aspects of AI implementation across industries, from real-time pharma news delivery and control room optimization to accelerated adoption of AI and competitive dynamics in the AI sector. The articles provide insights into how AI is shaping operational efficiency, innovation, decision-making, and industry-specific tasks, reflecting the increasing utilization of AI technology for growth and productivity enhancement.
10 | 
11 | ### NEWS LIST
12 | 
13 | - [AppliedXL Collaborates with Bloomberg to Provide AI-Powered, Real-Time Pharma News on the Bloomberg Terminal](https://www.lelezard.com/en/news-21360719.html)
14 | 
15 |     - `[summray]` AppliedXL collaborates with Bloomberg to provide AI-powered, real-time pharma news on the Bloomberg Terminal. The collaboration aims to deliver key insights to help users stay ahead of catalyst events in the pharmaceutical industry. AppliedXL's AI technology analyzes live public data to uncover signals and trends, which are then distilled into early news stories included in real-time news feeds for early signal detection and market analysis. The collaboration focuses on the life sciences and biopharma space, alerting users to irregularities in clinical trial progressions and other market-moving events. AppliedXL's AI technology combines machine learning and human expertise to provide precise and contextualized information efficiently.
16 |     - `[comment]` This news article discusses how AppliedXL collaborates with Bloomberg to provide AI-powered, real-time pharma news on the Bloomberg Terminal. The collaboration aims to deliver key insights to help users stay ahead of catalyst events in the pharmaceutical industry, showcasing the use of AI in delivering real-time industry updates.
17 | 
18 | - [AI for control rooms](https://www.symmetrymagazine.org/article/ai-for-control-rooms?language_content_entity=und)
19 | 
20 |     - `[summray]` AI is being utilized in control rooms within the fields of particle physics and astrophysics to assist with complex tasks. From machine learning algorithms helping to keep particle beams flowing in accelerators to optimizing telescope scheduling for studying galaxies, AI is proving to be a valuable tool for scientists. Additionally, AI is being developed to aid electric grid operators in managing the increasing number of energy resources connecting to the grid. The goal is not to replace human operators but to enhance decision-making by presenting them with the best tool options immediately and learning from human feedback.
21 |     - `[comment]` The article highlights the use of AI in control rooms within particle physics and astrophysics, assisting with complex tasks. It showcases how AI is enhancing decision-making and presenting the best tool options immediately to human operators, aligning with the current trend of utilizing AI to optimize processes.
22 | 
23 | - [Six AI industry trends we're tracking in 2024 (and beyond)](https://diginomica.com/six-ai-industry-trends-were-tracking-2024-and-beyond)
24 | 
25 |     - `[summray]` In 2024, the adoption of AI across industries has accelerated significantly, with projections indicating that by 2040, 1.3 million businesses will be utilizing AI to drive innovation. Various sectors such as the telecom industry, manufacturing, energy, utilities, construction, asset-centric service providers, and defense companies are leveraging AI and automation to enhance operational efficiency, drive performance, accelerate evolution, alleviate challenges, transform fleet management, and strengthen cybersecurity. As organizations invest in advanced technology like AI to optimize processes and automate industry-specific tasks, the potential for growth and productivity enhancement is vast, signaling a shift towards more resilient and digitally transformed operations.
26 |     - `[comment]` The content discusses the accelerated adoption of AI across various industries, enhancing operational efficiency and driving innovation. It reflects the trend of organizations investing in advanced technology like AI to optimize processes and automate industry-specific tasks for enhanced growth and productivity.
27 | 
28 | - [Microsoft's Fear Of Google's AI Dominance Led To OpenAI Investment, Internal Email Reveals: 'We're Multiple Years Behind The Competition'](https://www.benzinga.com/news/24/05/38582364/microsofts-fear-of-googles-ai-dominance-led-to-openai-investment-internal-email-reveals-were-multipl)
29 | 
30 |     - `[summray]` An internal email from Microsoft Corp. revealed that the company's investment in OpenAI was motivated by the fear of falling behind Google in AI capabilities. Microsoft's chief technology officer Kevin Scott expressed concerns about the lack of machine learning scale, infrastructure, and development speed compared to Google and OpenAI. The email highlighted the intense competition in the AI space, with Microsoft investing over $13 billion in OpenAI to enhance various services. The email sheds light on the rivalry between Microsoft and Google in the AI sector, with Google introducing Bard (now Gemini) to compete with OpenAI's ChatGPT, facing some challenges during the launch. This news article reflects the current trends and developments in AI models and the competitive landscape in the industry.
31 |     - `[comment]` The news reveals Microsoft's investment in OpenAI motivated by the fear of falling behind Google in AI capabilities. It sheds light on the intense competition in the AI space, showcasing the current trends and developments in AI models and the competitive landscape in the industry.
32 | 
33 | - [Q1 2024 Cognizant Technology Solutions Corp Earnings Call](https://finance.yahoo.com/news/q1-2024-cognizant-technology-solutions-123608449.html)
34 | 
35 |     - `[summray]` Cognizant Technology Solutions reported on their Q1 2024 Earnings Call, highlighting progress against strategic priorities in a challenging demand environment. They delivered revenue growth exceeding guidance, expanded adjusted operating margin, and noted improvements in voluntary attrition. The company saw sequential growth in Health Sciences and Communications, Media and Technology, with declines in Financial Services. The demand environment remains uncertain, shifting client spending to cost-saving projects. Cognizant focuses on innovation, including AI, cloud, and digital technologies. They mentioned partnerships with Microsoft, Google Cloud, and NVIDIA for AI initiatives. The company emphasized the importance of collaboration, cited recognition for innovation, and highlighted their Bluebolt grassroots initiative. Overall, they aim to increase revenue growth, become an employer of choice, and simplify operations.
36 |     - `[comment]` Cognizant Technology Solutions' Q1 2024 Earnings Call highlights their focus on innovation, including AI, cloud, and digital technologies. The partnerships with Microsoft, Google Cloud, and NVIDIA for AI initiatives showcase the ongoing trend of companies leveraging AI for growth and becoming employers of choice.
37 | 
38 | ## Innovations and Research
39 | 
40 | ### PROLOGUE
41 | 
42 | > Recent innovations and breakthroughs in the AI models domain are highlighted in the selected news articles. China's advancements in AI technologies, including the SenseNova 5.0 large language model and Vidu text-to-video AI tool, demonstrate the country's commitment to cutting-edge AI developments. Additionally, the rise of generative AI is emphasized as a key trend for driving innovation and organizational growth. Furthermore, a team of researchers has outlined guidelines for the responsible use of machine learning in science, aiming to enhance credibility and reproducibility in research. Explore more about these advancements and guidelines in the following articles: [China's AI Advances](https://swarajyamag.com/technology/chinas-ai-advances-that-are-flying-under-the-radar), [Generative AI's Exponential Potential](https://www.forbes.com/sites/forbestechcouncil/2024/05/02/innovators-should-seize-on-generative-ais-exponential-potential/), [Science's AI Problem](https://www.sciencedaily.com/releases/2024/05/240501153055.htm)
43 | 
44 | ### NEWS LIST
45 | 
46 | - [China's AI Advances That Are Flying Under The Radar](https://swarajyamag.com/technology/chinas-ai-advances-that-are-flying-under-the-radar)
47 | 
48 |     - `[summray]` China is making significant advancements in Artificial Intelligence (AI), with recent releases rivalling those in the United States. SenseTime unveiled the SenseNova 5.0 large language model (LLM) with impressive capabilities in knowledge, mathematics, reasoning, and coding. The model surpasses OpenAI's GPT-4 Turbo and tops various multimodal benchmarks. Another innovation is Vidu, a text-to-video AI tool that can generate 16-second videos based on simple text prompts. Additionally, Stardust Intelligence introduced the Astribot S1 humanoid robot, capable of performing household chores and imitating human movements. China is demonstrating seriousness in its AI ambitions, with over 40 approved AI models for public use and a vision to empower billions of people with AI robot assistants.
49 |     - `[comment]` China's advancements in AI, such as the SenseNova 5.0 large language model and Vidu text-to-video AI tool, showcase the country's commitment to innovative technologies in the AI domain.
50 | 
51 | - [Innovators Should Seize On Generative AI's Exponential Potential](https://www.forbes.com/sites/forbestechcouncil/2024/05/02/innovators-should-seize-on-generative-ais-exponential-potential/)
52 | 
53 |     - `[summray]` Generative AI is identified as a significant trend in the tech industry that necessitates rapid adaptation. The market for generative AI is projected to grow rapidly, with organizations investing in the technology to drive innovation. McKinsey details how generative AI can accelerate organizational growth by rapidly processing information, writing code for self-improvement, and enhancing competitive edge. By utilizing generative AI tools tailored for each phase of innovation, organizations can revamp their innovation processes to tap into the technology's potential. The importance of experimentation, prototyping, and scaling is emphasized, with generative AI offering various tools to aid in these processes. The democratization of innovation across employees and the augmentation of emerging technologies hold promise for accelerating organization's adaptability and competitiveness in leveraging generative AI for innovation.
54 |     - `[comment]` Generative AI is a crucial trend in tech, with potential to drive rapid innovation and organizational growth. Organizations should leverage generative AI tools for revolutionizing their innovation processes.
55 | 
56 | - [Science has an AI problem: This group says they can fix it](https://www.sciencedaily.com/releases/2024/05/240501153055.htm)
57 | 
58 |     - `[summray]` An interdisciplinary team of 19 researchers, led by Princeton University computer scientists Arvind Narayanan and Sayash Kapoor, has published guidelines for the responsible use of machine learning in science to address the credibility crisis in research caused by deep flaws in machine learning methods. The guidelines focus on transparency and integrity, calling for detailed descriptions of machine learning models, code, data, hardware specifications, experimental design, and project goals. The aim is to ensure reproducibility of results, validate claims, and accelerate scientific progress by improving the quality of published papers.
59 |     - `[comment]` A team of researchers has provided guidelines for responsible use of machine learning in science to address credibility issues. Transparency and integrity in machine learning models are crucial for reproducibility of results and scientific progress.
60 | 
61 | ## Future Outlook
62 | 
63 | ### PROLOGUE
64 | 
65 | > The following article delves into the future prospects, challenges, and potential advancements of AI models in the context of business operations and employee dynamics. It explores the impact of technological advancements, particularly Artificial Intelligence (AI), on businesses and employees, focusing on layoffs resulting from automation. For more information, you can visit the article [Layoffs in the wake of technological advancements: The inherent benefits for businesses and employees](https://www.ghanaweb.com/GhanaHomePage/business/Layoffs-in-the-wake-of-technological-advancements-The-inherent-benefits-for-businesses-and-employees-1928854).
66 | 
67 | ### NEWS LIST
68 | 
69 | - [Layoffs in the wake of technological advancements: The inherent benefits for businesses and employees](https://www.ghanaweb.com/GhanaHomePage/business/Layoffs-in-the-wake-of-technological-advancements-The-inherent-benefits-for-businesses-and-employees-1928854)
70 | 
71 |     - `[summray]` The article discusses the impact of technological advancements, particularly Artificial Intelligence (AI), on businesses and employees, focusing on layoffs as a result of automation. It highlights the benefits and challenges of AI in the workplace, such as increased productivity, job displacement, and layoff exercises. The causes of layoffs, including economic downturns, technological advancements, restructuring, shifting consumer preferences, and cost-saving measures, are explored, along with the opportunities they bring for businesses. Additionally, the article outlines the benefits of layoffs for employees, such as severance packages, career reevaluation, increased market value, networking opportunities, personal growth, and entrepreneurial opportunities.
72 |     - `[comment]` The article provides insights into the impact of technological advancements, specifically AI, on businesses and employees, highlighting the challenges and benefits associated with layoffs. It is a relevant read for understanding the future prospects of AI models in the workplace.
73 | 
74 | 
75 | 
76 | ---
77 | 
78 | Powered by [Agently AI Application Development Framework & Agently Workflow](https://github.com/Maplemx/Agently)
79 | 
80 | Model Information：OAIClient - {'model': 'gpt-3.5-turbo'}
81 | 
82 | **_<font color = "red">Agent</font><font color = "blue">ly</font>_** [Guidebook](https://github.com/Maplemx/Agently/blob/main/docs/guidebook)
83 | 
84 | [Apply Developers WeChat Group](https://doc.weixin.qq.com/forms/AIoA8gcHAFMAScAhgZQABIlW6tV3l7QQf) or Scan QR Code to Apply.
85 | 
86 | <img width="120" alt="image" src="https://github.com/Maplemx/Agently/assets/4413155/7f4bc9bf-a125-4a1e-a0a4-0170b718c1a6">


--------------------------------------------------------------------------------
/logs/Agently_daily_news_collector.log:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/prompts/create_outline.yaml:
--------------------------------------------------------------------------------
 1 | input: 
 2 |     topic: ${topic}
 3 |     news_time_limit: ${news_time_limit}
 4 | instruct:
 5 |     task: prepare news collection outline according {input.topic} and {input.news_time_limit}
 6 |     output language: ${language}
 7 | output:
 8 |     report_title:
 9 |         $type: str
10 |         $desc: generate a title for this news collection like "news about sports", "news about finance"
11 |     column_list:
12 |         $type:
13 |             - column_title:
14 |                   $type: str
15 |                   $desc: title of this column even if this is the only one column
16 |               column_requirement:
17 |                   $type: str
18 |                   $desc: describe recheck standard about the contents in this column to make sure all contents are aimed at the requirement of {input}'s topic
19 |               search_keywords:
20 |                   $type: str
21 |                   $desc: search keywords for this column splited by space. make sure the filed keyword about {input} is included in keywords.
22 |         $desc: the number of columns <= ${max_column_num}


--------------------------------------------------------------------------------
/prompts/pick_news.yaml:
--------------------------------------------------------------------------------
 1 | input: ${column_news}
 2 | instruct:
 3 |   news select rules:
 4 |     - ${column_requirement}
 5 |     - if several news are similar, just retain the one with most famous source and output {can_use} as false for others
 6 | output:
 7 |   - id:
 8 |       $type: int
 9 |       $desc: value from {input.[].id}
10 |     can_use:
11 |       $type: bool
12 |       $desc: judge if {input.brief} can be used according {instruct}
13 |     recommend_comment:
14 |       $type: str
15 |       $desc: provide your recommend comment if {can_use} == true, or just output null


--------------------------------------------------------------------------------
/prompts/summarize.yaml:
--------------------------------------------------------------------------------
 1 | input: ${news_content}
 2 | info:
 3 |   column requirement: ${column_requirement}
 4 |   news title: ${news_title}
 5 | instruct:
 6 |   output language: ${language}
 7 |   summary rule:
 8 |     - find and summarize the main content part of the news content which is collected from webpage
 9 |     - summary focus on relative content to {column requirement} and {news title}
10 |     - summary in one paragraph without linebreak
11 | output:
12 |   can_summarize:
13 |     $type: bool
14 |     $desc: judge if {input} has enough relative content to be summarized
15 |   translated_title:
16 |     $type: str
17 |     $desc: translate {input.news title} into ${language}
18 |   summary:
19 |     $type: str
20 |     $desc: summarize {input} according {info} and {instruct} if {can_summarize} == true, or output null
21 | 


--------------------------------------------------------------------------------
/prompts/write_column.yaml:
--------------------------------------------------------------------------------
 1 | input: ${slimmed_news}
 2 | info:
 3 |   column requirement: ${column_requirement}
 4 | instruct:
 5 |   news select rules:
 6 |     - if there're serveral similar content news, only select one of them into {news_list}
 7 |     - all news selected must follow or be relative to {column requirement}
 8 |   output language: ${language}
 9 | output:
10 |   news_list:
11 |     $type:
12 |       - id:
13 |           $type: int
14 |           $desc: value from {input.[].id}
15 |         recommend_comment:
16 |           $type: str
17 |           $desc: provide your recommend comment of this news according your role and {column requirement}
18 |     $desc: select news into column list according {news select rules} from {input}
19 |   prologue:
20 |     $type: str
21 |     $desc: write a prologue for readers according {news_list} and {news select rules}, you can use [<key info>](news url) to mark key information


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Agently==3.3.4.7
2 | PyYAML==6.0.1
3 | duckduckgo_search>=5.3.0
4 | beautifulsoup4>=4.12.3
5 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AgentEra/Agently-Daily-News-Collector/bec8042823ccd6fead496efad480ec4bb56d8b98/utils/__init__.py


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | logging.getLogger().setLevel(logging.NOTSET)
 5 | 
 6 | class Logger(object):
 7 |     def __init__(self, **kwargs):
 8 |         name = kwargs.get("name", "Agently-Daily-News-Collector")
 9 |         log_level = kwargs.get("log_level", "ERROR")
10 |         console_level = kwargs.get("console_level", "INFO")
11 |         log_format = kwargs.get("format", "%(asctime)s\t[%(levelname)s]\t%(message)s")
12 |         log_path = kwargs.get("path", "./logs/Agently_daily_news_collector.log")
13 |         handlers = kwargs.get("handlers", [])
14 |         self.logger = logging.getLogger(name)
15 |         if self.logger.hasHandlers():
16 |             self.logger.handlers.clear()
17 |         stream_handler = logging.StreamHandler()
18 |         stream_handler.setLevel(getattr(logging, console_level))
19 |         stream_handler.setFormatter(logging.Formatter(log_format))
20 |         self.logger.addHandler(stream_handler)
21 |         file_handler = logging.FileHandler(log_path)
22 |         file_handler.setLevel(getattr(logging, log_level))
23 |         file_handler.setFormatter(logging.Formatter(log_format))
24 |         self.logger.addHandler(file_handler)
25 |         for handler in handlers:
26 |             self.logger.addHandler(handler)
27 | 
28 |     def __transform(self, *args, **kwargs):
29 |         message = ""
30 |         for arg in args:
31 |             message += f"{ arg }\t"
32 |         message = message[:-1]
33 |         kwargs_to_list = []
34 |         kwargs_message = ""
35 |         for key, value in kwargs.items():
36 |             kwargs_to_list.append(f"{ key }: { str(value) }")
37 |         kwargs_message += "\t".join(kwargs_to_list)
38 |         if kwargs_message != "":
39 |             message += f"\t{ kwargs_message }"
40 |         return message
41 | 
42 |     def debug(self, *args, **kwargs):
43 |         return self.logger.debug(self.__transform(*args, **kwargs))
44 | 
45 |     def info(self, *args, **kwargs):
46 |         return self.logger.info(self.__transform(*args, **kwargs))
47 | 
48 |     def warning(self, *args, **kwargs):
49 |         return self.logger.warning(self.__transform(*args, **kwargs))
50 | 
51 |     def error(self, *args, **kwargs):
52 |         return self.logger.error(self.__transform(*args, **kwargs))
53 | 
54 |     def critical(self, *args, **kwargs):
55 |         return self.logger.critical(self.__transform(*args, **kwargs))
56 | 
57 | logger = Logger()


--------------------------------------------------------------------------------
/utils/path.py:
--------------------------------------------------------------------------------
1 | import os
2 | root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


--------------------------------------------------------------------------------
/utils/yaml_reader.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from types import SimpleNamespace
 3 | 
 4 | class YAMLResult(SimpleNamespace):
 5 |     pass
 6 | 
 7 | def read(yaml_path:str):
 8 |     try:
 9 |         with open(yaml_path, "r") as yaml_file:
10 |             yaml_dict = yaml.safe_load(yaml_file)
11 |             return YAMLResult(**yaml_dict)
12 |     except Exception as e:
13 |         raise Exception(f"[YAML Reader] Error occured when read YAML from path '{ yaml_path }'.\nError: { str(e) }")


--------------------------------------------------------------------------------
/workflows/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AgentEra/Agently-Daily-News-Collector/bec8042823ccd6fead496efad480ec4bb56d8b98/workflows/__init__.py


--------------------------------------------------------------------------------
/workflows/column_workflow.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import Agently
  3 | from .tools.search import search
  4 | from .tools.browse import browse
  5 | 
  6 | def start(column_outline, *, agent_factory, SETTINGS, root_path, logger):
  7 |     tool_proxy = (
  8 |         SETTINGS.TOOL_PROXY
  9 |         if hasattr(SETTINGS, "TOOL_PROXY")
 10 |         else
 11 |         (
 12 |             SETTINGS.PROXY
 13 |             if hasattr(SETTINGS, "PROXY")
 14 |             else None
 15 |         ) 
 16 |     )
 17 |     logger.info("[Start Generate Column]", column_outline["column_title"])
 18 |     column_workflow = Agently.Workflow()
 19 |     column_editor_agent = agent_factory.create_agent()
 20 |     # You can set column editor agent here, read https://github.com/Maplemx/Agently/tree/main/docs/guidebook to explore
 21 |     """
 22 |     (
 23 |         column_editor_agent
 24 |             .set_role("...")
 25 |             .set_user_info("...")
 26 |     )
 27 |     """
 28 | 
 29 |     # Define Workflow Chunks
 30 |     @column_workflow.chunk("start", type="Start")
 31 | 
 32 |     @column_workflow.chunk("search")
 33 |     def search_executor(inputs, storage):
 34 |         storage.set(
 35 |             "searched_news",
 36 |             search(
 37 |                 column_outline["search_keywords"],
 38 |                 timelimit=SETTINGS.NEWS_TIME_LIMIT if hasattr(SETTINGS, "NEWS_TIME_LIMIT") else "d",
 39 |                 proxy=tool_proxy,
 40 |                 logger=logger,
 41 |             )
 42 |         )
 43 | 
 44 |     @column_workflow.chunk("pick_news")
 45 |     def pick_news_executor(inputs, storage):
 46 |         searched_news = storage.get("searched_news", [])
 47 |         logger.info("[Search News Count]", len(searched_news))
 48 |         if len(searched_news) > 0:
 49 |             pick_results = (
 50 |                 column_editor_agent
 51 |                     .load_yaml_prompt(
 52 |                         path=f"{ root_path }/prompts/pick_news.yaml",
 53 |                         variables={
 54 |                             "column_news": searched_news,
 55 |                             "column_requirement": column_outline["column_requirement"],
 56 |                         }
 57 |                     )
 58 |                     .start()
 59 |             )
 60 |             # sleep to avoid requesting too often
 61 |             time.sleep(SETTINGS.SLEEP_TIME)
 62 |             picked_news = []
 63 |             for pick_result in pick_results:
 64 |                 if pick_result["can_use"]:
 65 |                     news = searched_news[int(pick_result["id"])].copy()
 66 |                     news.update({ "recommend_comment": pick_result["recommend_comment"] })
 67 |                     picked_news.append(news)
 68 |             storage.set("picked_news", picked_news)
 69 |             logger.info("[Picked News Count]", len(picked_news))
 70 |         else:
 71 |             storage.set("picked_news", [])
 72 |             logger.info("[Picked News Count]", 0)
 73 | 
 74 |     @column_workflow.chunk("read_and_summarize")
 75 |     def read_and_summarize_executor(inputs, storage):
 76 |         picked_news = storage.get("picked_news", [])
 77 |         readed_news = []
 78 |         if picked_news and len(picked_news) > 0:
 79 |             for news in picked_news:
 80 |                 logger.info("[Summarzing]", news["title"])
 81 |                 news_content = browse(
 82 |                     news["url"],
 83 |                     proxy=tool_proxy,
 84 |                     logger=logger,
 85 |                 )
 86 |                 if news_content and news_content != "":
 87 |                     try:
 88 |                         summary_result = (
 89 |                             column_editor_agent
 90 |                                 .load_yaml_prompt(
 91 |                                     path=f"{ root_path }/prompts/summarize.yaml",
 92 |                                     variables={
 93 |                                         "news_content": news_content,
 94 |                                         "column_requirement": column_outline["column_requirement"],
 95 |                                         "news_title": news["title"],
 96 |                                         "language": SETTINGS.OUTPUT_LANGUAGE,
 97 |                                     }
 98 |                                 )
 99 |                                 .start()
100 |                         )
101 |                         if summary_result["can_summarize"]:
102 |                             readed_news_info = news.copy()
103 |                             readed_news_info.update({
104 |                                 "title": summary_result["translated_title"],
105 |                                 "summary": summary_result["summary"]
106 |                             })
107 |                             readed_news.append(readed_news_info)
108 |                             logger.info("[Summarzing]", "Success")
109 |                         else:
110 |                             logger.info("[Summarzing]", "Failed")
111 |                         # sleep to avoid requesting too often
112 |                         time.sleep(SETTINGS.SLEEP_TIME)
113 |                     except Exception as e:
114 |                         logger.error(f"[Summarzie]: Can not summarize '{ news['title'] }'.\tError: { str(e) }")
115 |         storage.set("readed_news", readed_news)
116 | 
117 |     @column_workflow.chunk("write_column")
118 |     def write_column_executor(inputs, storage):
119 |         readed_news = storage.get("readed_news", [])
120 |         if readed_news and len(readed_news) > 0:
121 |             slimmed_news = []
122 |             for index, news in enumerate(readed_news):
123 |                 slimmed_news.append({
124 |                     "id": index,
125 |                     "title": news["title"],
126 |                     "summary": news["summary"],
127 |                     "url": news["url"],
128 |                 })
129 |             column_result = (
130 |                 column_editor_agent
131 |                     .load_yaml_prompt(
132 |                         path=f"{ root_path }/prompts/write_column.yaml",
133 |                         variables={
134 |                             "slimmed_news": slimmed_news,
135 |                             "column_requirement": column_outline["column_requirement"],
136 |                             "language": SETTINGS.OUTPUT_LANGUAGE,
137 |                         }
138 |                     )
139 |                     .start()
140 |             )
141 |             # sleep to avoid requesting too often
142 |             time.sleep(SETTINGS.SLEEP_TIME)
143 |             final_news_list = []
144 |             for news in column_result["news_list"]:
145 |                 id = news["id"]
146 |                 final_news_list.append({
147 |                     "url": readed_news[id]["url"],
148 |                     "title": readed_news[id]["title"],
149 |                     "summary": readed_news[id]["summary"],
150 |                     "recommend_comment": news["recommend_comment"],
151 |                 })
152 |             storage.set("final_result", {
153 |                 "title": column_outline["column_title"],
154 |                 "prologue": column_result["prologue"],
155 |                 "news_list": final_news_list,
156 |             })
157 |         else:
158 |             storage.set("final_result", None)
159 | 
160 |     # Connect Chunks
161 |     (
162 |         column_workflow.chunks["start"]
163 |             .connect_to(column_workflow.chunks["search"])
164 |             .connect_to(column_workflow.chunks["pick_news"])
165 |             .connect_to(column_workflow.chunks["read_and_summarize"])
166 |             .connect_to(column_workflow.chunks["write_column"])
167 |     )
168 | 
169 |     # Start Workflow
170 |     column_workflow.start()
171 | 
172 |     return column_workflow.executor.store.get("final_result")
173 | 


--------------------------------------------------------------------------------
/workflows/main_workflow.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import Agently
  3 | from datetime import datetime
  4 | from .column_workflow import start as start_column_workflow
  5 | 
  6 | def start(*, agent_factory, SETTINGS, root_path, logger):
  7 |     main_workflow = Agently.Workflow()
  8 |     chief_editor_agent = agent_factory.create_agent()
  9 |     # You can set chief editor agent here, read https://github.com/Maplemx/Agently/tree/main/docs/guidebook to explore
 10 |     """
 11 |     (
 12 |         chief_editor_agent
 13 |             .set_role("...")
 14 |             .set_user_info("...")
 15 |     )
 16 |     """
 17 | 
 18 |     # Define Workflow Chunks
 19 |     @main_workflow.chunk("start", type="Start")
 20 | 
 21 |     @main_workflow.chunk("input_topic")
 22 |     def input_topic_executor(inputs, storage):
 23 |         if not SETTINGS.USE_CUSTOMIZE_OUTLINE:
 24 |             storage.set(
 25 |                 "topic",
 26 |                 input("[Please input the topic of your news collection]: ")
 27 |             )
 28 | 
 29 |     @main_workflow.chunk("generate_outline")
 30 |     def generate_outline_executor(inputs, storage):
 31 |         if SETTINGS.USE_CUSTOMIZE_OUTLINE:
 32 |             storage.set("outline", SETTINGS.CUSTOMIZE_OUTLINE)
 33 |             logger.info("[Use Customize Outline]", SETTINGS.CUSTOMIZE_OUTLINE)
 34 |         else:
 35 |             # Load prompt from /prompts/create_outline.yaml
 36 |             outline = (
 37 |                 chief_editor_agent
 38 |                     .load_yaml_prompt(
 39 |                         path=f"{ root_path }/prompts/create_outline.yaml",
 40 |                         variables={
 41 |                             "topic": storage.get("topic"),
 42 |                             "news_time_limit": SETTINGS.NEWS_TIME_LIMIT if hasattr(SETTINGS, "NEWS_TIME_LIMIT") else "d",
 43 |                             "language": SETTINGS.OUTPUT_LANGUAGE,
 44 |                             "max_column_num": SETTINGS.MAX_COLUMN_NUM,
 45 |                         }
 46 |                     )
 47 |                     .start()
 48 |             )
 49 |             storage.set("outline", outline)
 50 |             logger.info("[Outline Generated]", outline)
 51 |             # sleep to avoid requesting too often
 52 |             time.sleep(SETTINGS.SLEEP_TIME)
 53 | 
 54 |     @main_workflow.chunk("generate_columns")
 55 |     def generate_columns_executor(inputs, storage):
 56 |         columns_data = []
 57 |         outline = storage.get("outline")
 58 |         for column_outline in outline["column_list"]:
 59 |             column_data = start_column_workflow(
 60 |                 column_outline=column_outline,
 61 |                 agent_factory=agent_factory,
 62 |                 SETTINGS=SETTINGS,
 63 |                 root_path=root_path,
 64 |                 logger=logger,
 65 |             )
 66 |             if column_data:
 67 |                 columns_data.append(column_data)
 68 |                 logger.info("[Column Data Prepared]", column_data)
 69 |         storage.set("columns_data", columns_data)
 70 | 
 71 |     @main_workflow.chunk("generate_markdown")
 72 |     def generate_markdown_executor(inputs, storage):
 73 |         outline = storage.get("outline")
 74 |         columns_data = storage.get("columns_data")
 75 |         if columns_data and len(columns_data) > 0:
 76 |             # Main Title
 77 |             md_doc_text = f'# { outline["report_title"] }\n\n'
 78 |             md_doc_text += f'> { datetime.now().strftime("%Y-%m-%d %A") }\n\n'
 79 |             # Columns
 80 |             if SETTINGS.IS_DEBUG:
 81 |                 logger.debug("[Columns Data]", columns_data)
 82 |             for column_data in columns_data:
 83 |                 md_doc_text += f'## { column_data["title"] }\n\n### PROLOGUE\n\n'
 84 |                 md_doc_text += f'> { column_data["prologue"] }\n\n'
 85 |                 md_doc_text += f"### NEWS LIST\n\n"
 86 |                 for single_news in column_data["news_list"]:
 87 |                     md_doc_text += f'- [{ single_news["title"] }]({ single_news["url"] })\n\n'
 88 |                     md_doc_text += f'    - `[summray]` { single_news["summary"] }\n'
 89 |                     md_doc_text += f'    - `[comment]` { single_news["recommend_comment"] }\n\n'
 90 |             # Tailer
 91 |             md_doc_text +="\n\n---\n\nPowered by [Agently AI Application Development Framework & Agently Workflow](https://github.com/Maplemx/Agently)\n\n"
 92 |             md_doc_text += f"Model Information：{ SETTINGS.MODEL_PROVIDER if hasattr(SETTINGS, 'MODEL_PROVIDER') else 'OpenAI' } - { str(SETTINGS.MODEL_OPTIONS) if hasattr(SETTINGS, 'MODEL_OPTIONS') else 'Default Options' }\n\n"
 93 |             md_doc_text += '**_<font color = "red">Agent</font><font color = "blue">ly</font>_** [Guidebook](https://github.com/Maplemx/Agently/blob/main/docs/guidebook)\n\n[Apply Developers WeChat Group](https://doc.weixin.qq.com/forms/AIoA8gcHAFMAScAhgZQABIlW6tV3l7QQf) or Scan QR Code to Apply.\n\n<img width="120" alt="image" src="https://github.com/Maplemx/Agently/assets/4413155/7f4bc9bf-a125-4a1e-a0a4-0170b718c1a6">'
 94 |             logger.info("[Markdown Generated]", md_doc_text)
 95 |             with open(f'{ root_path }/{ outline["report_title"] }_{ datetime.now().strftime("%Y-%m-%d") }.md', 'w', encoding='utf-8') as f:
 96 |                 f.write(md_doc_text)
 97 |         else:
 98 |             logger.info("[Markdown Generation Failed] Due to have not any column data.")
 99 | 
100 |     # Connect Chunks
101 |     (
102 |         main_workflow.chunks["start"]
103 |             .connect_to(main_workflow.chunks["input_topic"])
104 |             .connect_to(main_workflow.chunks["generate_outline"])
105 |             .connect_to(main_workflow.chunks["generate_columns"])
106 |             .connect_to(main_workflow.chunks["generate_markdown"])
107 |     )
108 | 
109 |     # Start Workflow
110 |     main_workflow.start()


--------------------------------------------------------------------------------
/workflows/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AgentEra/Agently-Daily-News-Collector/bec8042823ccd6fead496efad480ec4bb56d8b98/workflows/tools/__init__.py


--------------------------------------------------------------------------------
/workflows/tools/browse.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import requests
 3 | from bs4 import BeautifulSoup
 4 | 
 5 | def browse(url, *, logger=None, proxy=None):
 6 |     content = ""
 7 |     try:
 8 |         request_options = {
 9 |             "headers": { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" }
10 |         }
11 |         if proxy:
12 |             if proxy.startswith("http:"):
13 |                 request_options.update({ "proxies": { "http": proxy } })
14 |             elif proxy.startswith("https:"):
15 |                 request_options.update({ "proxies": { "https": proxy } })
16 |         page = requests.get(
17 |             url,
18 |             **request_options
19 |         )
20 |         soup = BeautifulSoup(page.content, "html.parser")
21 |         # find text in p, list, pre (github code), td
22 |         chunks = soup.find_all(["h1", "h2", "h3", "h4", "h5", "p", "pre", "td"])
23 |         for chunk in chunks:
24 |             if chunk.name.startswith("h"):
25 |                 content += "#" * int(chunk.name[-1]) + " " + chunk.get_text() + "\n"
26 |             else:
27 |                 text = chunk.get_text()
28 |                 if text and text != "":
29 |                     content += text + "\n"
30 |         # find text in div that class=content
31 |         divs = soup.find("div", class_="content")
32 |         if divs:
33 |             chunks_with_text = divs.find_all(text=True)
34 |             for chunk in chunks_with_text:
35 |                 if isinstance(chunk, str) and chunk.strip():
36 |                     content += chunk.strip() + "\n"
37 |         content = re.sub(r"\n+", "\n", content)
38 |         return content
39 |     except Exception as e:
40 |         if logger:
41 |             logger.error(f"[Browse]: Can not browse '{ url }'.\tError: { str(e) }")
42 |         return ""


--------------------------------------------------------------------------------
/workflows/tools/search.py:
--------------------------------------------------------------------------------
 1 | from duckduckgo_search import DDGS
 2 | 
 3 | def search(keywords, **kwargs):
 4 |     results = []
 5 |     try:
 6 |         with DDGS(proxy=kwargs.get("proxy", None)) as ddgs:
 7 |             for index, result in enumerate(
 8 |                 ddgs.news(
 9 |                     keywords,
10 |                     max_results=kwargs.get("max_results", 8),
11 |                     timelimit=kwargs.get("timelimit", "d"),
12 |                 )
13 |             ):
14 |                 results.append({
15 |                     "id": index,
16 |                     "title": result["title"],
17 |                     "brief": result["body"],
18 |                     "url": result["url"],
19 |                     "source": result["source"],
20 |                     "date": result["date"],
21 |                 })
22 |         return results
23 |     except Exception as e:
24 |         if "logger" in kwargs:
25 |             kwargs["logger"].error(f"[Search]: Can not search '{ keywords }'.\tError: { str(e) }")
26 |         return [] 
27 | 


--------------------------------------------------------------------------------