├── docs
├── cv-arxiv-daily-wechat.json
├── _config.yml
└── README.md
├── requirements.txt
├── assets
├── 4-ga-7.png
├── 4-ga-8.png
├── 4-ga-9.png
├── 4-ga-2-1.png
├── 4-ga-3-1.png
├── 4-ga-5-1.png
└── 5-pages-1.png
├── .github
├── ISSUE_TEMPLATE
│ ├── config.yml
│ ├── feature_request.md
│ ├── question.md
│ └── bug_report.md
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── jekyll-gh-pages.yml
│ ├── llm-inference-daily.yml
│ └── update_paper_links.yml
├── .gitignore
├── config.yaml
├── CODE_OF_CONDUCT.md
├── LICENSE
└── daily_arxiv.py
/docs/cv-arxiv-daily-wechat.json:
--------------------------------------------------------------------------------
1 | {
2 |
3 | }
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | arxiv
3 | pyyaml
--------------------------------------------------------------------------------
/assets/4-ga-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Toseic/LLM-inference-arxiv-daily/HEAD/assets/4-ga-7.png
--------------------------------------------------------------------------------
/assets/4-ga-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Toseic/LLM-inference-arxiv-daily/HEAD/assets/4-ga-8.png
--------------------------------------------------------------------------------
/assets/4-ga-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Toseic/LLM-inference-arxiv-daily/HEAD/assets/4-ga-9.png
--------------------------------------------------------------------------------
/assets/4-ga-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Toseic/LLM-inference-arxiv-daily/HEAD/assets/4-ga-2-1.png
--------------------------------------------------------------------------------
/assets/4-ga-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Toseic/LLM-inference-arxiv-daily/HEAD/assets/4-ga-3-1.png
--------------------------------------------------------------------------------
/assets/4-ga-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Toseic/LLM-inference-arxiv-daily/HEAD/assets/4-ga-5-1.png
--------------------------------------------------------------------------------
/assets/5-pages-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Toseic/LLM-inference-arxiv-daily/HEAD/assets/5-pages-1.png
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | # Configuration: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository
2 |
3 | blank_issues_enabled: false
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | CMakeLists.txt.user
2 | CMakeLists_modified.txt
3 |
4 |
5 | build/
6 |
7 | lib/
8 | bin/
9 |
10 | cmake_modules/
11 | cmake-build-debug/
12 | .idea/
13 | .vscode/
14 | *.pyc
15 |
16 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## Description
2 |
3 |
4 |
5 | ## Related Issue
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 🚀 Feature request
3 | about: Suggest an idea for this project 🏖
4 | title: ""
5 | labels: enhancement
6 | assignees:
7 | ---
8 |
9 | ## 🚀 Feature Request
10 |
11 |
12 |
13 | ## 📎 Additional context
14 |
15 |
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: ❓ Question
3 | about: Ask a question about this project 🎓
4 | title: ""
5 | labels: question
6 | assignees:
7 | ---
8 |
9 | ## Checklist
10 |
11 |
12 |
13 | - [ ] I've searched the project's [`issues`]
14 |
15 | ## ❓ Question
16 |
17 |
18 |
19 | How can I [...]?
20 |
21 | Is it possible to [...]?
22 |
23 | ## 📎 Additional context
24 |
25 |
26 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 🐛 Bug report
3 | about: If something isn't working 🔧
4 | title: ""
5 | labels: bug
6 | assignees:
7 | ---
8 |
9 | ## 🐛 Bug Report
10 |
11 |
12 |
13 | ## 🔬 How To Reproduce
14 |
15 | Steps to reproduce the behavior:
16 |
17 | 1. ...
18 |
19 | ### Environment
20 |
21 | - OS: [e.g. Linux / Windows / macOS]
22 | - Python version, get it with:
23 |
24 | ```bash
25 | python --version
26 | ```
27 |
28 | ## 📎 Additional context
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | title: LLM inference Arxiv Daily
2 | description: Automatically Update inference Papers Daily using Github Actions (Update Every 8th hours)
3 | show_downloads: true
4 |
5 | github:
6 | zip_url: https://github.com/Toseic/LLM-inference-arxiv-daily
7 | another_url: https://github.com/Toseic/LLM-inference-arxiv-daily
8 |
9 | ## add remote theme
10 | remote_theme: jekyll/minima@v2.5.1
11 |
12 | # minima:
13 | skin: dark
14 | social_links:
15 | twitter: AlphaRealcat
16 | github: vincentqyw
17 |
18 | plugins:
19 | - jekyll-remote-theme # add this line to the plugins list if you already have one
20 | - jekyll-feed
21 | - jekyll-seo-tag
22 |
--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
1 | # TODO: add papers by configuration file
2 | base_url: "https://arxiv.paperswithcode.com/api/v0/papers/"
3 | user_name: "Vincentqyw"
4 | repo_name: "cv-arxiv-daily"
5 | show_authors: True
6 | show_links: True
7 | show_badge: True
8 | max_results: 10
9 |
10 | publish_readme: True
11 | publish_gitpage: True
12 | publish_wechat: False
13 |
14 | # file paths
15 | json_readme_path: './docs/cv-arxiv-daily.json'
16 | json_gitpage_path: './docs/cv-arxiv-daily-web.json'
17 | json_wechat_path: './docs/cv-arxiv-daily-wechat.json'
18 |
19 | md_readme_path: 'README.md'
20 | md_gitpage_path: './docs/index.md'
21 | md_wechat_path: './docs/wechat.md'
22 |
23 | # keywords to search
24 | keywords:
25 | "inference":
26 | filters: ["LLM inference", "LLM inference system"]
27 | "MoE":
28 | filters: ["MoE", "Mixture of experts"]
29 |
30 |
--------------------------------------------------------------------------------
/.github/workflows/jekyll-gh-pages.yml:
--------------------------------------------------------------------------------
1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages
2 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled
3 |
4 | on:
5 | # Runs on pushes targeting the default branch
6 | push:
7 | branches: ["main"]
8 | schedule:
9 | - cron: "30 0/12 * * *" #'*/60 * * * *'
10 | # Allows you to run this workflow manually from the Actions tab
11 | workflow_dispatch:
12 |
13 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
14 | permissions:
15 | contents: read
16 | pages: write
17 | id-token: write
18 |
19 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
20 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
21 | concurrency:
22 | group: "pages"
23 | cancel-in-progress: false
24 |
25 | jobs:
26 | # Build job
27 | build:
28 | runs-on: ubuntu-latest
29 | steps:
30 | - name: Checkout
31 | uses: actions/checkout@v4
32 | - name: Setup Pages
33 | uses: actions/configure-pages@v5
34 | - name: Build with Jekyll
35 | uses: actions/jekyll-build-pages@v1
36 | with:
37 | source: ./docs/
38 | destination: ./_site
39 | - name: Upload artifact
40 | uses: actions/upload-pages-artifact@v3
41 |
42 | # Deployment job
43 | deploy:
44 | environment:
45 | name: github-pages
46 | url: ${{ steps.deployment.outputs.page_url }}
47 | runs-on: ubuntu-latest
48 | needs: build
49 | steps:
50 | - name: Deploy to GitHub Pages
51 | id: deployment
52 | uses: actions/deploy-pages@v4
53 |
--------------------------------------------------------------------------------
/.github/workflows/llm-inference-daily.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: Run LLM inference Papers Daily
4 |
5 | # Controls when the workflow will run
6 | on:
7 | # Allows you to run this workflow manually from the Actions tab
8 | workflow_dispatch:
9 | schedule:
10 | - cron: "0 0/12 * * *" #'*/60 * * * *'
11 | # Triggers the workflow on push or pull request events but only for the main branch
12 | push:
13 | branches:
14 | - main
15 |
16 | env:
17 |
18 | GITHUB_USER_NAME: Toseic
19 | GITHUB_USER_EMAIL: Toseic@users.noreply.github.com
20 |
21 |
22 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
23 | jobs:
24 | # This workflow contains a single job called "build"
25 | build:
26 | name: update
27 | # The type of runner that the job will run on
28 | runs-on: ubuntu-latest
29 |
30 | # Steps represent a sequence of tasks that will be executed as part of the job
31 | steps:
32 | - name: Checkout
33 | uses: actions/checkout@v3
34 |
35 | - name: Set up Python Env
36 | uses: actions/setup-python@v4
37 | with:
38 | python-version: '3.10'
39 | #architecture: 'x64' # optional x64 or x86. Defaults to x64 if not specified
40 | - name: Install dependencies
41 | run: |
42 | python -m pip install --upgrade pip
43 | pip install arxiv
44 | pip install requests
45 | pip install pyyaml
46 |
47 | - name: Run daily arxiv
48 | run: |
49 | python daily_arxiv.py
50 |
51 | - name: Push new cv-arxiv-daily.md
52 | uses: github-actions-x/commit@v2.9
53 | with:
54 | github-token: ${{ secrets.GITHUB_TOKEN }}
55 | commit-message: "Github Action Automatic Update LLM inference Arxiv Papers"
56 | files: README.md docs/cv-arxiv-daily.json docs/cv-arxiv-daily-web.json docs/index.md docs/cv-arxiv-daily-wechat.json docs/wechat.md
57 | rebase: 'true'
58 | name: ${{ env.GITHUB_USER_NAME }}
59 | email: ${{ env.GITHUB_USER_EMAIL }}
60 |
--------------------------------------------------------------------------------
/.github/workflows/update_paper_links.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: Run Update Paper Links Weekly
4 |
5 | # Controls when the workflow will run
6 | on:
7 | # Allows you to run this workflow manually from the Actions tab
8 | workflow_dispatch:
9 | schedule:
10 | - cron: "0 8 * * 1" #Run At 08:00 on Monday
11 | # Triggers the workflow on push or pull request events but only for the main branch
12 | push:
13 | branches:
14 | - main
15 |
16 | env:
17 |
18 | GITHUB_USER_NAME: Toseic
19 | GITHUB_USER_EMAIL: Toseic@users.noreply.github.com
20 |
21 |
22 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
23 | jobs:
24 | # This workflow contains a single job called "build"
25 | build:
26 | name: update
27 | # The type of runner that the job will run on
28 | runs-on: ubuntu-latest
29 |
30 | # Steps represent a sequence of tasks that will be executed as part of the job
31 | steps:
32 | - name: Checkout
33 | uses: actions/checkout@v3
34 |
35 | - name: Set up Python Env
36 | uses: actions/setup-python@v4
37 | with:
38 | python-version: '3.10'
39 | #architecture: 'x64' # optional x64 or x86. Defaults to x64 if not specified
40 | - name: Install dependencies
41 | run: |
42 | python -m pip install --upgrade pip
43 | pip install arxiv
44 | pip install requests
45 | pip install pyyaml
46 |
47 | - name: Run daily arxiv
48 | run: |
49 | python daily_arxiv.py --update_paper_links
50 |
51 | - name: Push new cv-arxiv-daily.md
52 | uses: github-actions-x/commit@v2.9
53 | with:
54 | github-token: ${{ secrets.GITHUB_TOKEN }}
55 | commit-message: "Github Action Automatic Update CV Arxiv Papers"
56 | files: README.md docs/cv-arxiv-daily.json docs/cv-arxiv-daily-web.json docs/index.md docs/cv-arxiv-daily-wechat.json docs/wechat.md
57 | rebase: 'true'
58 | name: ${{ env.GITHUB_USER_NAME }}
59 | email: ${{ env.GITHUB_USER_EMAIL }}
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 |
2 |
CV-ARXIV-DAILY
Automatically Update CV Papers Daily using Github Actions
3 |
4 |
5 |
6 |
7 |
8 | ##
9 |
10 | This repository hosts the source code for cv-arxiv-daily, an useful sripts to fetch arxiv paper daily.
11 |
12 | ## Overview
13 |
14 | This codebase is composed of the following parts:
15 |
16 | - `daily_arxiv.py`: main scripts to processing given configurations
17 | - `config.yaml`: configuration file of papers' keywords etc.
18 |
19 | ## Usage
20 |
21 |
22 | Table of Contents
23 |
24 | 1. Fork this [repo](https://github.com/Vincentqyw/cv-arxiv-daily)
25 | 2. Edit configs:
26 | - Change `GITHUB_USER_NAME` and `GITHUB_USER_EMAIL` in [cv-arxiv-daily.yml](../.github/workflows/cv-arxiv-daily.yml) and [update_paper_links.yml](../.github/workflows/update_paper_links.yml)
27 | - Change `user_name` in [config.yaml](../config.yaml)
28 | - Push changes to remote repo
29 | 3. Config Github Actions
30 | - Enable read and write permissions: Setting -> Actions -> Workflow permissions, select `Read and write permissions` and save.
31 | 
32 | - Enable workflows: Actions -> `I understand my workflows, go ahead and enable them` -> Select `Run Arxiv Papars Daily` in right sidebar and click `Enable workflow` -> click `Run workflow` wait about 1 min until the job update done. The same for the job `Run Update Paper Links Weekly`.
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 4. Setting Gitpages (optional)
39 | - Setting -> Pages -> Build an deployment. Source: `Deploy from a branch`; Branch select `main` and `/docs` folder, then save.
40 | 
41 | - Now you can open gitpage: https://your_github_usrname.github.io/cv-arxiv-daily
42 | 5. Add new keywords (optional)
43 | - Edit `keywords` in [config.yaml](../config.yaml), you can add more filters or keywords.
44 | - Push changes to remote repo and re-run Github Actions Manually.
45 |
46 |
47 |
48 | ## Release plan
49 |
50 | We are still in the process of fully releasing. Here is the release plan:
51 |
52 | - [x] Configuration file
53 | - [x] Update code link
54 | - [ ] Subscribe & Update alerting
55 | - [ ] Support more `arxiv` filters
56 | - [ ] Archive old papers
57 | - [ ] Language translation ([`ChatGPT`](https://chat.openai.com/chat))
58 | - [ ] Usefull comments
59 | - [ ] ...
60 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | alpharealcat@gmail.com.
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series
86 | of actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or
93 | permanent ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/daily_arxiv.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import json
4 | import arxiv
5 | import yaml
6 | import logging
7 | import argparse
8 | import datetime
9 | import requests
10 |
11 | logging.basicConfig(
12 | format="[%(asctime)s %(levelname)s] %(message)s",
13 | datefmt="%m/%d/%Y %H:%M:%S",
14 | level=logging.INFO,
15 | )
16 |
17 | base_url = "https://arxiv.paperswithcode.com/api/v0/papers/"
18 | github_url = "https://api.github.com/search/repositories"
19 | arxiv_url = "http://arxiv.org/"
20 |
21 |
22 | def load_config(config_file: str) -> dict:
23 | """
24 | config_file: input config file path
25 | return: a dict of configuration
26 | """
27 |
28 | # make filters pretty
29 | def pretty_filters(**config) -> dict:
30 | keywords = dict()
31 | EXCAPE = '"'
32 | QUOTA = "" # NO-USE
33 | OR = "OR" # TODO
34 |
35 | def parse_filters(filters: list):
36 | ret = ""
37 | for idx in range(0, len(filters)):
38 | filter = filters[idx]
39 | if len(filter.split()) > 1:
40 | ret += EXCAPE + filter + EXCAPE
41 | else:
42 | ret += QUOTA + filter + QUOTA
43 | if idx != len(filters) - 1:
44 | ret += OR
45 | return ret
46 |
47 | for k, v in config["keywords"].items():
48 | keywords[k] = parse_filters(v["filters"])
49 | return keywords
50 |
51 | with open(config_file, "r") as f:
52 | config = yaml.load(f, Loader=yaml.FullLoader)
53 | config["kv"] = pretty_filters(**config)
54 | logging.info(f"config = {config}")
55 | return config
56 |
57 |
58 | def get_authors(authors, first_author=False):
59 | output = str()
60 | if first_author == False:
61 | output = ", ".join(str(author) for author in authors)
62 | else:
63 | output = authors[0]
64 | return output
65 |
66 |
67 | def sort_papers(papers):
68 | output = dict()
69 | keys = list(papers.keys())
70 | keys.sort(reverse=True)
71 | for key in keys:
72 | output[key] = papers[key]
73 | return output
74 |
75 |
76 | import requests
77 |
78 |
79 | def get_code_link(qword: str) -> str:
80 | """
81 | This short function was auto-generated by ChatGPT.
82 | I only renamed some params and added some comments.
83 | @param qword: query string, eg. arxiv ids and paper titles
84 | @return paper_code in github: string, if not found, return None
85 | """
86 | # query = f"arxiv:{arxiv_id}"
87 | query = f"{qword}"
88 | params = {"q": query, "sort": "stars", "order": "desc"}
89 | r = requests.get(github_url, params=params)
90 | results = r.json()
91 | code_link = None
92 | if results["total_count"] > 0:
93 | code_link = results["items"][0]["html_url"]
94 | return code_link
95 |
96 |
97 | def get_daily_papers(topic, query="slam", max_results=2):
98 | """
99 | @param topic: str
100 | @param query: str
101 | @return paper_with_code: dict
102 | """
103 | # output
104 | content = dict()
105 | content_to_web = dict()
106 | search_engine = arxiv.Search(
107 | query=query, max_results=max_results, sort_by=arxiv.SortCriterion.SubmittedDate
108 | )
109 |
110 | for result in search_engine.results():
111 |
112 | paper_id = result.get_short_id()
113 | paper_title = result.title
114 | paper_url = result.entry_id
115 | code_url = base_url + paper_id # TODO
116 | paper_abstract = result.summary.replace("\n", " ")
117 | paper_authors = get_authors(result.authors)
118 | paper_first_author = get_authors(result.authors, first_author=True)
119 | primary_category = result.primary_category
120 | publish_time = result.published.date()
121 | update_time = result.updated.date()
122 | comments = result.comment
123 |
124 | logging.info(
125 | f"Time = {update_time} title = {paper_title} author = {paper_first_author}"
126 | )
127 |
128 | # eg: 2108.09112v1 -> 2108.09112
129 | ver_pos = paper_id.find("v")
130 | if ver_pos == -1:
131 | paper_key = paper_id
132 | else:
133 | paper_key = paper_id[0:ver_pos]
134 | paper_url = arxiv_url + "abs/" + paper_key
135 |
136 | try:
137 | # source code link
138 | # r = requests.get(code_url).json()
139 | # repo_url = None
140 | # if "official" in r and r["official"]:
141 | # repo_url = r["official"]["url"]
142 | repo_url = None
143 | # FIXME: paperswithcode api is down?
144 | # TODO: not found, two more chances
145 | # else:
146 | # repo_url = get_code_link(paper_title)
147 | # if repo_url is None:
148 | # repo_url = get_code_link(paper_key)
149 | if repo_url is not None:
150 | content[paper_key] = (
151 | "|**{}**|**{}**|{} et.al.|[{}]({})|**[link]({})**|\n".format(
152 | update_time,
153 | paper_title,
154 | paper_first_author,
155 | paper_key,
156 | paper_url,
157 | repo_url,
158 | )
159 | )
160 | content_to_web[paper_key] = (
161 | "- {}, **{}**, {} et.al., Paper: [{}]({}), Code: **[{}]({})**".format(
162 | update_time,
163 | paper_title,
164 | paper_first_author,
165 | paper_url,
166 | paper_url,
167 | repo_url,
168 | repo_url,
169 | )
170 | )
171 |
172 | else:
173 | content[paper_key] = "|**{}**|**{}**|{} et.al.|[{}]({})|null|\n".format(
174 | update_time, paper_title, paper_first_author, paper_key, paper_url
175 | )
176 | content_to_web[paper_key] = (
177 | "- {}, **{}**, {} et.al., Paper: [{}]({})".format(
178 | update_time,
179 | paper_title,
180 | paper_first_author,
181 | paper_url,
182 | paper_url,
183 | )
184 | )
185 |
186 | # TODO: select useful comments
187 | comments = None
188 | if comments != None:
189 | content_to_web[paper_key] += f", {comments}\n"
190 | else:
191 | content_to_web[paper_key] += f"\n"
192 |
193 | except Exception as e:
194 | logging.error(f"exception: {e} with id: {paper_key}")
195 |
196 | data = {topic: content}
197 | data_web = {topic: content_to_web}
198 | return data, data_web
199 |
200 |
201 | def update_paper_links(filename):
202 | """
203 | weekly update paper links in json file
204 | """
205 |
206 | def parse_arxiv_string(s):
207 | parts = s.split("|")
208 | date = parts[1].strip()
209 | title = parts[2].strip()
210 | authors = parts[3].strip()
211 | arxiv_id = parts[4].strip()
212 | code = parts[5].strip()
213 | arxiv_id = re.sub(r"v\d+", "", arxiv_id)
214 | return date, title, authors, arxiv_id, code
215 |
216 | with open(filename, "r") as f:
217 | content = f.read()
218 | if not content:
219 | m = {}
220 | else:
221 | m = json.loads(content)
222 |
223 | json_data = m.copy()
224 |
225 | for keywords, v in json_data.items():
226 | logging.info(f"keywords = {keywords}")
227 | for paper_id, contents in v.items():
228 | contents = str(contents)
229 |
230 | update_time, paper_title, paper_first_author, paper_url, code_url = (
231 | parse_arxiv_string(contents)
232 | )
233 |
234 | contents = "|{}|{}|{}|{}|{}|\n".format(
235 | update_time, paper_title, paper_first_author, paper_url, code_url
236 | )
237 | json_data[keywords][paper_id] = str(contents)
238 | logging.info(f"paper_id = {paper_id}, contents = {contents}")
239 |
240 | valid_link = False if "|null|" in contents else True
241 | if valid_link:
242 | continue
243 | try:
244 | code_url = base_url + paper_id # TODO
245 | r = requests.get(code_url).json()
246 | repo_url = None
247 | if "official" in r and r["official"]:
248 | repo_url = r["official"]["url"]
249 | if repo_url is not None:
250 | new_cont = contents.replace(
251 | "|null|", f"|**[link]({repo_url})**|"
252 | )
253 | logging.info(f"ID = {paper_id}, contents = {new_cont}")
254 | json_data[keywords][paper_id] = str(new_cont)
255 |
256 | except Exception as e:
257 | logging.error(f"exception: {e} with id: {paper_id}")
258 | # dump to json file
259 | with open(filename, "w") as f:
260 | json.dump(json_data, f)
261 |
262 |
263 | def update_json_file(filename, data_dict):
264 | """
265 | daily update json file using data_dict
266 | """
267 | with open(filename, "r") as f:
268 | content = f.read()
269 | if not content:
270 | m = {}
271 | else:
272 | m = json.loads(content)
273 |
274 | json_data = m.copy()
275 |
276 | # update papers in each keywords
277 | for data in data_dict:
278 | for keyword in data.keys():
279 | papers = data[keyword]
280 |
281 | if keyword in json_data.keys():
282 | json_data[keyword].update(papers)
283 | else:
284 | json_data[keyword] = papers
285 |
286 | with open(filename, "w") as f:
287 | json.dump(json_data, f)
288 |
289 |
290 | def json_to_md(
291 | filename,
292 | md_filename,
293 | task="",
294 | to_web=False,
295 | use_title=True,
296 | use_tc=True,
297 | show_badge=True,
298 | use_b2t=True,
299 | ):
300 | """
301 | @param filename: str
302 | @param md_filename: str
303 | @return None
304 | """
305 |
306 | def pretty_math(s: str) -> str:
307 | ret = ""
308 | match = re.search(r"\$.*\$", s)
309 | if match == None:
310 | return s
311 | math_start, math_end = match.span()
312 | space_trail = space_leading = ""
313 | if s[:math_start][-1] != " " and "*" != s[:math_start][-1]:
314 | space_trail = " "
315 | if s[math_end:][0] != " " and "*" != s[math_end:][0]:
316 | space_leading = " "
317 | ret += s[:math_start]
318 | ret += f"{space_trail}${match.group()[1:-1].strip()}${space_leading}"
319 | ret += s[math_end:]
320 | return ret
321 |
322 | DateNow = datetime.date.today()
323 | DateNow = str(DateNow)
324 | DateNow = DateNow.replace("-", ".")
325 |
326 | with open(filename, "r") as f:
327 | content = f.read()
328 | if not content:
329 | data = {}
330 | else:
331 | data = json.loads(content)
332 |
333 | # clean README.md if daily already exist else create it
334 | with open(md_filename, "w+") as f:
335 | pass
336 |
337 | # write data into README.md
338 | with open(md_filename, "a+") as f:
339 |
340 | if (use_title == True) and (to_web == True):
341 | f.write("---\n" + "layout: default\n" + "---\n\n")
342 |
343 | if show_badge == True:
344 | f.write(f"[![Contributors][contributors-shield]][contributors-url]\n")
345 | f.write(f"[![Forks][forks-shield]][forks-url]\n")
346 | f.write(f"[![Stargazers][stars-shield]][stars-url]\n")
347 | f.write(f"[![Issues][issues-shield]][issues-url]\n\n")
348 |
349 | if use_title == True:
350 | # f.write(("
CV-ARXIV-DAILY"
351 | # "
Automatically Update CV Papers Daily
\n"))
352 | f.write("## Updated on " + DateNow + "\n")
353 | else:
354 | f.write("> Updated on " + DateNow + "\n")
355 |
356 | # TODO: add usage
357 | # f.write("> Usage instructions: [here](./docs/README.md#usage)\n\n")
358 | # f.write("## contents")
359 |
360 |
361 | # Add: table of contents
362 | if use_tc == True:
363 | # f.write("\n")
364 | # f.write(" Table of Contents
\n")
365 | # f.write(" \n")
366 | for keyword in data.keys():
367 | day_content = data[keyword]
368 | if not day_content:
369 | continue
370 | kw = keyword.replace(" ", "-")
371 | f.write(f"* [{kw}](#{kw.lower()})\n")
372 | # f.write("
\n")
373 | # f.write(" \n\n")
374 |
375 | for keyword in data.keys():
376 | day_content = data[keyword]
377 | if not day_content:
378 | continue
379 | # the head of each part
380 | f.write(f"## {keyword}\n\n")
381 |
382 | if use_title == True:
383 | if to_web == False:
384 | f.write(
385 | "|Publish Date|Title|Authors|PDF|Code|\n"
386 | + "|---|---|---|---|---|\n"
387 | )
388 | else:
389 | f.write("| Publish Date | Title | Authors | PDF | Code |\n")
390 | f.write(
391 | "|:---------|:-----------------------|:---------|:------|:------|\n"
392 | )
393 |
394 | # sort papers by date
395 | day_content = sort_papers(day_content)
396 |
397 | for _, v in day_content.items():
398 | if v is not None:
399 | f.write(pretty_math(v)) # make latex pretty
400 |
401 | f.write(f"\n")
402 |
403 | # Add: back to top
404 | if use_b2t:
405 | top_info = f"#Updated on {DateNow}"
406 | top_info = top_info.replace(" ", "-").replace(".", "")
407 | f.write(
408 | f"(back to top)
\n\n"
409 | )
410 |
411 | if show_badge == True:
412 | # we don't like long string, break it!
413 | f.write(
414 | (
415 | f"[contributors-shield]: https://img.shields.io/github/"
416 | f"contributors/Vincentqyw/cv-arxiv-daily.svg?style=for-the-badge\n"
417 | )
418 | )
419 | f.write(
420 | (
421 | f"[contributors-url]: https://github.com/Vincentqyw/"
422 | f"cv-arxiv-daily/graphs/contributors\n"
423 | )
424 | )
425 | f.write(
426 | (
427 | f"[forks-shield]: https://img.shields.io/github/forks/Vincentqyw/"
428 | f"cv-arxiv-daily.svg?style=for-the-badge\n"
429 | )
430 | )
431 | f.write(
432 | (
433 | f"[forks-url]: https://github.com/Vincentqyw/"
434 | f"cv-arxiv-daily/network/members\n"
435 | )
436 | )
437 | f.write(
438 | (
439 | f"[stars-shield]: https://img.shields.io/github/stars/Vincentqyw/"
440 | f"cv-arxiv-daily.svg?style=for-the-badge\n"
441 | )
442 | )
443 | f.write(
444 | (
445 | f"[stars-url]: https://github.com/Vincentqyw/"
446 | f"cv-arxiv-daily/stargazers\n"
447 | )
448 | )
449 | f.write(
450 | (
451 | f"[issues-shield]: https://img.shields.io/github/issues/Vincentqyw/"
452 | f"cv-arxiv-daily.svg?style=for-the-badge\n"
453 | )
454 | )
455 | f.write(
456 | (
457 | f"[issues-url]: https://github.com/Vincentqyw/"
458 | f"cv-arxiv-daily/issues\n\n"
459 | )
460 | )
461 |
462 | logging.info(f"{task} finished")
463 |
464 |
465 | def demo(**config):
466 | # TODO: use config
467 | data_collector = []
468 | data_collector_web = []
469 |
470 | keywords = config["kv"]
471 | max_results = config["max_results"]
472 | publish_readme = config["publish_readme"]
473 | publish_gitpage = config["publish_gitpage"]
474 | publish_wechat = config["publish_wechat"]
475 | show_badge = config["show_badge"]
476 |
477 | b_update = config["update_paper_links"]
478 | logging.info(f"Update Paper Link = {b_update}")
479 | if config["update_paper_links"] == False:
480 | logging.info(f"GET daily papers begin")
481 | for topic, keyword in keywords.items():
482 | logging.info(f"Keyword: {topic}")
483 | data, data_web = get_daily_papers(
484 | topic, query=keyword, max_results=max_results
485 | )
486 | data_collector.append(data)
487 | data_collector_web.append(data_web)
488 | print("\n")
489 | logging.info(f"GET daily papers end")
490 |
491 | # 1. update README.md file
492 | if publish_readme:
493 | json_file = config["json_readme_path"]
494 | md_file = config["md_readme_path"]
495 | # update paper links
496 | if config["update_paper_links"]:
497 | update_paper_links(json_file)
498 | else:
499 | # update json data
500 | update_json_file(json_file, data_collector)
501 | # json data to markdown
502 | json_to_md(json_file, md_file, task="Update Readme", show_badge=show_badge)
503 |
504 | # 2. update docs/index.md file (to gitpage)
505 | if publish_gitpage:
506 | json_file = config["json_gitpage_path"]
507 | md_file = config["md_gitpage_path"]
508 | # TODO: duplicated update paper links!!!
509 | if config["update_paper_links"]:
510 | update_paper_links(json_file)
511 | else:
512 | update_json_file(json_file, data_collector)
513 | json_to_md(
514 | json_file,
515 | md_file,
516 | task="Update GitPage",
517 | to_web=True,
518 | show_badge=show_badge,
519 | use_tc=True,
520 | use_b2t=True,
521 | )
522 |
523 | # 3. Update docs/wechat.md file
524 | if publish_wechat:
525 | json_file = config["json_wechat_path"]
526 | md_file = config["md_wechat_path"]
527 | # TODO: duplicated update paper links!!!
528 | if config["update_paper_links"]:
529 | update_paper_links(json_file)
530 | else:
531 | update_json_file(json_file, data_collector_web)
532 | json_to_md(
533 | json_file,
534 | md_file,
535 | task="Update Wechat",
536 | to_web=False,
537 | use_title=False,
538 | show_badge=show_badge,
539 | )
540 |
541 |
542 | if __name__ == "__main__":
543 | parser = argparse.ArgumentParser()
544 | parser.add_argument(
545 | "--config_path", type=str, default="config.yaml", help="configuration file path"
546 | )
547 | parser.add_argument(
548 | "--update_paper_links",
549 | default=False,
550 | action="store_true",
551 | help="whether to update paper links etc.",
552 | )
553 | args = parser.parse_args()
554 | config = load_config(args.config_path)
555 | config = {**config, "update_paper_links": args.update_paper_links}
556 | demo(**config)
557 |
--------------------------------------------------------------------------------