├── .gitignore
├── CVPRHelper.py
├── README.md
├── downloader.py
├── example.gif
├── example.py
└── wordcloud.jpg
/.gitignore:
--------------------------------------------------------------------------------
1 | CVPR2021-contrastive/
2 | temp.html
3 | .DS_Store
4 |
--------------------------------------------------------------------------------
/CVPRHelper.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import os
3 | import re
4 | from typing import List
5 | from tqdm import tqdm
6 |
7 |
8 | CVF_URL = "https://openaccess.thecvf.com/"
9 |
10 |
11 | def download_file(url, dir, filename):
12 | r = requests.get(url, allow_redirects=True)
13 | open(f"{dir}/{filename}.pdf", 'wb').write(r.content)
14 |
15 |
16 | def mkdir(dir):
17 | if not os.path.exists(dir):
18 | os.mkdir(dir)
19 |
20 |
21 | class CVPRHelper:
22 | def __init__(self, year) -> None:
23 | self.year = str(year)
24 | webpage = requests.get(
25 | f"https://openaccess.thecvf.com/CVPR{year}?day=all").text
26 | open('temp.html', 'w').write(webpage)
27 | webpage = open('temp.html').read()
28 | pattern = "
\\n\[.*?"
29 | pattern = re.compile(pattern, re.DOTALL)
30 | paper_list = re.findall(pattern, webpage)
31 | paper_list_in_lines = [raw.split('\n') for raw in paper_list]
32 |
33 | bibex_pattern = re.compile(
34 | ".*?
", re.DOTALL)
35 | bibex_list_in_lines = [re.findall(bibex_pattern, raw)[
36 | 0].split('\n') for raw in paper_list]
37 |
38 | self.urls = [CVF_URL+lines[1][10:-10] for lines in paper_list_in_lines]
39 | self.authors = [lines[1].strip()[13:-2]
40 | for lines in bibex_list_in_lines]
41 | self.titles = [lines[2].strip()[13:-2]
42 | for lines in bibex_list_in_lines]
43 |
44 | def search_keyword(self, kw) -> List[int]:
45 | result = []
46 | for idx, title in enumerate(self.titles):
47 | if kw.lower() in title.lower():
48 | result.append(idx)
49 | print(f"found {len(result)} papers")
50 | return result
51 |
52 | def download_paper(self, idx, save_to):
53 | download_file(self.urls[idx], save_to, filename=self.titles[idx])
54 |
55 | def download_keyword(self, kw):
56 | download_dir = f"./CVPR{self.year}-{kw}/"
57 | mkdir(download_dir)
58 | paper_idx_list = self.search_keyword(kw)
59 | bar = tqdm(paper_idx_list)
60 | for paper_idx in bar:
61 | self.download_paper(paper_idx, download_dir)
62 | bar.set_description(
63 | f"Downloading \"{self.titles[paper_idx][:10]}...\"")
64 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CVFPaperHelper
2 | Automatically download multiple papers by keywords in CVPR
3 |
4 | ## Install
5 | ```bash
6 | mkdir PapersToRead
7 | cd PaperToRead
8 | pip install requests tqdm
9 | git clone https://github.com/JamesQFreeman/CVFPaperHelper.git
10 | ```
11 |
12 | ## Usage
13 | In the bash you can:
14 |
15 | 
16 |
17 | ## Or use it as a class
18 | ```python
19 | def download():
20 | helper = CVPRHelper(2021)
21 | helper.download_keyword('generative')
22 |
23 |
24 | def search():
25 | helper = CVPRHelper(2021)
26 | for id in helper.search_keyword('generative'):
27 | print(helper.titles[id])
28 |
29 |
30 | def fancy_word_cloud():
31 | helper = CVPRHelper(2021)
32 | text = ' '.join(helper.titles)
33 | wc = WordCloud(background_color="white", height=800, width=1600)
34 | wc.generate(text)
35 | plt.axis("off")
36 | plt.imshow(wc, interpolation="bilinear")
37 | plt.show()
38 | ```
39 | such as build word cloud
40 |
41 | 
42 |
43 | ## Have a good time!
44 |
--------------------------------------------------------------------------------
/downloader.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from CVPRHelper import CVPRHelper
3 |
4 |
5 | if __name__ == '__main__':
6 | kw = sys.argv[1]
7 | helper = CVPRHelper(2021)
8 | print(f"Searching for \"{kw}\"...")
9 | helper.download_keyword(kw)
10 |
--------------------------------------------------------------------------------
/example.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesQFreeman/CVFPaperHelper/a4d54764a7daf2a27bb46c5db23178e1796041b9/example.gif
--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
1 | from CVPRHelper import CVPRHelper
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | from wordcloud import WordCloud
5 |
6 |
7 | def download():
8 | helper = CVPRHelper(2021)
9 | helper.download_keyword('generative')
10 |
11 |
12 | def search():
13 | helper = CVPRHelper(2021)
14 | for id in helper.search_keyword('generative'):
15 | print(helper.titles[id])
16 |
17 |
18 | def fancy_word_cloud():
19 | helper = CVPRHelper(2021)
20 | text = ' '.join(helper.titles)
21 | wc = WordCloud(background_color="white", height=800, width=1600)
22 | wc.generate(text)
23 | plt.axis("off")
24 | plt.imshow(wc, interpolation="bilinear")
25 | plt.show()
26 | # plt.savefig('wordcloud.jpg', dpi=500)
27 |
28 |
29 | fancy_word_cloud()
30 |
--------------------------------------------------------------------------------
/wordcloud.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesQFreeman/CVFPaperHelper/a4d54764a7daf2a27bb46c5db23178e1796041b9/wordcloud.jpg
--------------------------------------------------------------------------------