├── runtime.txt ├── scrapers ├── __init__.py ├── WebScraper.py └── GithubScraper.py ├── .gitignore ├── requirements.txt ├── .github └── workflows │ ├── update_csv_files.yml │ └── website.yml ├── main.py ├── site ├── index.html └── static │ └── index.html ├── README.md └── charts_data ├── participants_growth.ipynb └── findings_value.ipynb /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.8 -------------------------------------------------------------------------------- /scrapers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | __pycache__/ 3 | repos_data/ 4 | *.ipynb_* 5 | *.log 6 | *.sqlite 7 | *.xlsx 8 | *.sublime* -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | altair>=4.2.0 2 | bs4>=0.0.1 3 | GitPython>=3.1.27 4 | jupyter==1.0.0 5 | lxml>=4.6.3 6 | nbconvert==7.2.2 7 | pandas>=1.4.3 8 | python-dotenv>=0.20.0 9 | requests>=2.26.0 10 | requests_cache>=0.9.5 11 | selenium>=4.3.0 12 | webdriver-manager>=3.8.3 13 | -------------------------------------------------------------------------------- /.github/workflows/update_csv_files.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: Update scraped data 4 | 5 | # Controls when the workflow will run 6 | on: 7 | schedule: 8 | - cron: "0 12 * * *" 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 13 | jobs: 14 | # This workflow contains a single job called "build" 15 | scrape: 16 | # The type of runner that the job will run on 17 | runs-on: ubuntu-latest 18 | env: 19 | API_ACCESS_TOKEN: ${{ secrets.API_ACCESS_TOKEN }} 20 | GH_TOKEN: ${{ secrets.API_ACCESS_TOKEN }} 21 | 22 | # Steps represent a sequence of tasks that will be executed as part of the job 23 | steps: 24 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 25 | - uses: actions/checkout@v3 26 | 27 | - name: Setup Python 28 | uses: actions/setup-python@v4.2.0 29 | 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install -r requirements.txt 34 | 35 | - name: Run scraping script 36 | run: python main.py all 37 | 38 | - name: Git Auto Commit 39 | uses: stefanzweifel/git-auto-commit-action@v4.14.1 40 | with: 41 | commit_message: Updated all scraped data (CSV) 42 | file_pattern: '*.csv' 43 | -------------------------------------------------------------------------------- /.github/workflows/website.yml: -------------------------------------------------------------------------------- 1 | name: Build website 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | paths: 8 | - '**.csv' 9 | workflow_dispatch: 10 | 11 | permissions: 12 | contents: write 13 | pages: write 14 | id-token: write 15 | 16 | # Allow one concurrent deployment 17 | concurrency: 18 | group: "pages" 19 | cancel-in-progress: true 20 | 21 | jobs: 22 | # Single deploy job since we're just deploying 23 | deploy: 24 | environment: 25 | name: github-pages 26 | url: ${{ steps.deployment.outputs.page_url }} 27 | runs-on: ubuntu-latest 28 | steps: 29 | - name: Checkout 30 | uses: actions/checkout@v3 31 | - name: Setup Pages 32 | uses: actions/configure-pages@v2 33 | - uses: actions/setup-python@v4 34 | with: 35 | python-version: "3.11" 36 | - run: | 37 | python -m pip install --upgrade pip 38 | pip install -r requirements.txt 39 | pip uninstall rfc3986-validator -y 40 | - name: Update analysis notebooks 41 | run: | 42 | for filename in charts_data/*.ipynb; do 43 | jupyter nbconvert --to notebook --execute $filename --ExecutePreprocessor.kernel_name='python3' --inplace 44 | done 45 | - name: Convert notebooks to HTML 46 | run: | 47 | mkdir -p site/static 48 | for filename in charts_data/*.ipynb; do 49 | jupyter nbconvert --to html $filename 50 | mv ${filename%.*}.html site/static/ 51 | done 52 | - name: Install and build index 53 | run: | 54 | sudo apt-get update 55 | sudo apt-get install curl git -y 56 | curl https://raw.githubusercontent.com/jayanta525/apindex-v2/master/sudo-install.sh | bash 57 | cd site/ 58 | apindex . 59 | - name: Upload artifacts 60 | uses: actions/upload-pages-artifact@v1 61 | with: 62 | path: 'site/' 63 | - name: Deploy to GitHub Pages 64 | id: deployment 65 | uses: actions/deploy-pages@v1 66 | - name: Commit 67 | uses: stefanzweifel/git-auto-commit-action@v4 68 | with: 69 | commit_message: Automated static html notebooks build 70 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | from dotenv import load_dotenv 5 | from scrapers.WebScraper import * 6 | from scrapers.GithubScraper import * 7 | 8 | load_dotenv() 9 | 10 | def scrape(scrape_method, scrape_data_desc, url, csv_file=None): 11 | logging.info(f"Starting {scrape_data_desc} data scraping at '{url}'...") 12 | df = scrape_method(url) 13 | 14 | if (csv_file): 15 | df.to_csv(csv_file, index=False) 16 | 17 | logging.info(f"Finished {scrape_data_desc} data scraping: got {len(df.index)} rows of data [success]") 18 | return df 19 | 20 | if __name__ == "__main__": 21 | file_handler = logging.FileHandler("code4rena.log", mode='w', encoding='utf8') 22 | console_handler = logging.StreamHandler() 23 | console_handler.setLevel(logging.INFO) 24 | logging.basicConfig( 25 | handlers=[file_handler, console_handler], 26 | level=logging.DEBUG, 27 | format='%(module)s:T+%(relativeCreated)d\t%(levelname)s %(message)s' 28 | ) 29 | logging.getLogger('selenium').setLevel(logging.WARNING) # Prevent log file from being filed with Selenium debug output 30 | 31 | logging.addLevelName(logging.DEBUG, '[DEBUG]') 32 | logging.addLevelName(logging.INFO, '[*]') 33 | logging.addLevelName(logging.WARNING, '[!]') 34 | logging.addLevelName(logging.ERROR, '[ERROR]') 35 | logging.addLevelName(logging.CRITICAL, '[CRITICAL]') 36 | 37 | leaderboard_url = "https://code4rena.com/leaderboard" 38 | leaderboard_csv_file = 'leaderboard_code4rena.csv' 39 | 40 | contests_url = "https://code4rena.com/contests" 41 | contests_csv_file = 'contests_code4rena.csv' 42 | 43 | github_org = "code-423n4" 44 | github_csv_file = 'github_code4rena.csv' 45 | 46 | github_scraper = GithubScraper(console_handler) 47 | target = sys.argv[1].lower() # TODO : Parse command line arguments 48 | 49 | if (target == 'github'): 50 | scrape(github_scraper.scrape_repos, "Github repos", github_org, github_csv_file) 51 | else: 52 | web_scraper = WebScraper(console_handler) # Initialize Selenium driver only if needed 53 | if (target == 'leaderboard'): 54 | scrape(web_scraper.scrape_leaderboard_table, "Code4rena leaderboard", leaderboard_url, leaderboard_csv_file) 55 | elif (target == 'contests'): 56 | scrape(web_scraper.scrape_contests_data, "Code4rena contests", contests_url, contests_csv_file) 57 | else: 58 | scrape(web_scraper.scrape_leaderboard_table, "Code4rena leaderboard", leaderboard_url, leaderboard_csv_file) 59 | scrape(web_scraper.scrape_contests_data, "Code4rena contests", contests_url, contests_csv_file) 60 | scrape(github_scraper.scrape_repos, "Github repos", github_org, github_csv_file) 61 | -------------------------------------------------------------------------------- /site/index.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 | 5 || 16 | Filename 17 | | 18 |19 | Size 20 | | 21 |22 | Last Modified 23 | | 24 |
|---|---|---|
|
29 | |
32 | - | 33 |06-Jan-2023 16:00 | 34 |
|
37 | |
40 | - | 41 |06-Jan-2023 16:00 | 42 |