├── .gitignore ├── CVPRHelper.py ├── README.md ├── downloader.py ├── example.gif ├── example.py └── wordcloud.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | CVPR2021-contrastive/ 2 | temp.html 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /CVPRHelper.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | import re 4 | from typing import List 5 | from tqdm import tqdm 6 | 7 | 8 | CVF_URL = "https://openaccess.thecvf.com/" 9 | 10 | 11 | def download_file(url, dir, filename): 12 | r = requests.get(url, allow_redirects=True) 13 | open(f"{dir}/{filename}.pdf", 'wb').write(r.content) 14 | 15 | 16 | def mkdir(dir): 17 | if not os.path.exists(dir): 18 | os.mkdir(dir) 19 | 20 | 21 | class CVPRHelper: 22 | def __init__(self, year) -> None: 23 | self.year = str(year) 24 | webpage = requests.get( 25 | f"https://openaccess.thecvf.com/CVPR{year}?day=all").text 26 | open('temp.html', 'w').write(webpage) 27 | webpage = open('temp.html').read() 28 | pattern = "
\\n\[.*?" 29 | pattern = re.compile(pattern, re.DOTALL) 30 | paper_list = re.findall(pattern, webpage) 31 | paper_list_in_lines = [raw.split('\n') for raw in paper_list] 32 | 33 | bibex_pattern = re.compile( 34 | "
.*?
", re.DOTALL) 35 | bibex_list_in_lines = [re.findall(bibex_pattern, raw)[ 36 | 0].split('\n') for raw in paper_list] 37 | 38 | self.urls = [CVF_URL+lines[1][10:-10] for lines in paper_list_in_lines] 39 | self.authors = [lines[1].strip()[13:-2] 40 | for lines in bibex_list_in_lines] 41 | self.titles = [lines[2].strip()[13:-2] 42 | for lines in bibex_list_in_lines] 43 | 44 | def search_keyword(self, kw) -> List[int]: 45 | result = [] 46 | for idx, title in enumerate(self.titles): 47 | if kw.lower() in title.lower(): 48 | result.append(idx) 49 | print(f"found {len(result)} papers") 50 | return result 51 | 52 | def download_paper(self, idx, save_to): 53 | download_file(self.urls[idx], save_to, filename=self.titles[idx]) 54 | 55 | def download_keyword(self, kw): 56 | download_dir = f"./CVPR{self.year}-{kw}/" 57 | mkdir(download_dir) 58 | paper_idx_list = self.search_keyword(kw) 59 | bar = tqdm(paper_idx_list) 60 | for paper_idx in bar: 61 | self.download_paper(paper_idx, download_dir) 62 | bar.set_description( 63 | f"Downloading \"{self.titles[paper_idx][:10]}...\"") 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CVFPaperHelper 2 | Automatically download multiple papers by keywords in CVPR 3 | 4 | ## Install 5 | ```bash 6 | mkdir PapersToRead 7 | cd PaperToRead 8 | pip install requests tqdm 9 | git clone https://github.com/JamesQFreeman/CVFPaperHelper.git 10 | ``` 11 | 12 | ## Usage 13 | In the bash you can: 14 | 15 | ![download](/example.gif) 16 | 17 | ## Or use it as a class 18 | ```python 19 | def download(): 20 | helper = CVPRHelper(2021) 21 | helper.download_keyword('generative') 22 | 23 | 24 | def search(): 25 | helper = CVPRHelper(2021) 26 | for id in helper.search_keyword('generative'): 27 | print(helper.titles[id]) 28 | 29 | 30 | def fancy_word_cloud(): 31 | helper = CVPRHelper(2021) 32 | text = ' '.join(helper.titles) 33 | wc = WordCloud(background_color="white", height=800, width=1600) 34 | wc.generate(text) 35 | plt.axis("off") 36 | plt.imshow(wc, interpolation="bilinear") 37 | plt.show() 38 | ``` 39 | such as build word cloud 40 | 41 | ![wordcloud](/wordcloud.jpg) 42 | 43 | ## Have a good time! 44 | -------------------------------------------------------------------------------- /downloader.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from CVPRHelper import CVPRHelper 3 | 4 | 5 | if __name__ == '__main__': 6 | kw = sys.argv[1] 7 | helper = CVPRHelper(2021) 8 | print(f"Searching for \"{kw}\"...") 9 | helper.download_keyword(kw) 10 | -------------------------------------------------------------------------------- /example.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JamesQFreeman/CVFPaperHelper/a4d54764a7daf2a27bb46c5db23178e1796041b9/example.gif -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | from CVPRHelper import CVPRHelper 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from wordcloud import WordCloud 5 | 6 | 7 | def download(): 8 | helper = CVPRHelper(2021) 9 | helper.download_keyword('generative') 10 | 11 | 12 | def search(): 13 | helper = CVPRHelper(2021) 14 | for id in helper.search_keyword('generative'): 15 | print(helper.titles[id]) 16 | 17 | 18 | def fancy_word_cloud(): 19 | helper = CVPRHelper(2021) 20 | text = ' '.join(helper.titles) 21 | wc = WordCloud(background_color="white", height=800, width=1600) 22 | wc.generate(text) 23 | plt.axis("off") 24 | plt.imshow(wc, interpolation="bilinear") 25 | plt.show() 26 | # plt.savefig('wordcloud.jpg', dpi=500) 27 | 28 | 29 | fancy_word_cloud() 30 | -------------------------------------------------------------------------------- /wordcloud.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JamesQFreeman/CVFPaperHelper/a4d54764a7daf2a27bb46c5db23178e1796041b9/wordcloud.jpg --------------------------------------------------------------------------------