├── .github
└── workflows
│ ├── first-github-action.yml
│ ├── python-app.yml
│ └── share-data.yml
├── LICENSE
├── README.md
├── crawling_yes24.py
├── github_utils.py
├── hello.py
├── main.py
└── requirements.txt
/.github/workflows/first-github-action.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | matrix:
18 | python-version: [3.5, 3.6, 3.7, 3.8]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v2
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Run Python
27 | run: |
28 | python3 hello.py
29 |
--------------------------------------------------------------------------------
/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
1 | name: yes24_crawler
2 |
3 | on:
4 | schedule:
5 | - cron: '0 0 * * *'
6 |
7 | jobs:
8 | build:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v3
12 | - name: Set up Python
13 | uses: actions/setup-python@v3
14 | with:
15 | python-version: 3.9
16 | - name: Install dependencies
17 | run: |
18 | python -m pip install --upgrade pip
19 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
20 | - name: Run main.py
21 | run: |
22 | python main.py
23 | env:
24 | MY_GITHUB_TOKEN: ${{ secrets.MY_GITHUB_TOKEN }}
25 |
--------------------------------------------------------------------------------
/.github/workflows/share-data.yml:
--------------------------------------------------------------------------------
1 | name: Share data between jobs
2 |
3 | on: [push]
4 |
5 | jobs:
6 | job_1:
7 | name: Add 3 and 7
8 | runs-on: ubuntu-latest
9 | steps:
10 | - shell: bash
11 | run: |
12 | expr 3 + 7 > math-homework.txt
13 | - name: Upload math result for job 1
14 | uses: actions/upload-artifact@v1
15 | with:
16 | name: homework
17 | path: math-homework.txt
18 |
19 | job_2:
20 | name: Multiply by 9
21 | needs: job_1
22 | runs-on: windows-latest
23 | steps:
24 | - name: Download math result for job 1
25 | uses: actions/download-artifact@v1
26 | with:
27 | name: homework
28 | - shell: bash
29 | run: |
30 | value=`cat homework/math-homework.txt`
31 | expr $value \* 9 > homework/math-homework.txt
32 | - name: Upload math result for job 2
33 | uses: actions/upload-artifact@v1
34 | with:
35 | name: homework
36 | path: homework/math-homework.txt
37 |
38 | job_3:
39 | name: Display results
40 | needs: job_2
41 | runs-on: macOS-latest
42 | steps:
43 | - name: Download math result for job 2
44 | uses: actions/download-artifact@v1
45 | with:
46 | name: homework
47 | - name: Print the final result
48 | shell: bash
49 | run: |
50 | value=`cat homework/math-homework.txt`
51 | echo The result is $value
52 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Sung Yun Byeon
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Github Action with Python
2 | - (1) YES24 cron Github Action
3 | - YES24 IT 신간 도서에 있는 TOP 40을 가져와서, 해당 Github Issue 업로드
4 | - 매일 오전 9시에 업로드(한국 시간)
5 | - Watch 클릭시 이메일로 알람받을 수 있음
6 | - (2) hello.py 실행하는 Github Action
7 | - Master로 Push할 때 실행
8 | - (3) Job에서 생성한 파일을 공유해서 사용하는 Github Action
9 | - [upload-artifact](https://github.com/actions/upload-artifact)와 [download-artifact](https://github.com/actions/download-artifact) 사용 예제
10 |
--------------------------------------------------------------------------------
/crawling_yes24.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 |
4 |
5 | def parsing_beautifulsoup(url):
6 | """
7 | 뷰티풀 수프로 파싱하는 함수
8 | :param url: paring할 URL. 여기선 YES24 Link
9 | :return: BeautifulSoup soup Object
10 | """
11 |
12 | data = requests.get(url)
13 |
14 | html = data.text
15 | return BeautifulSoup(html, 'html.parser')
16 |
17 |
18 | def extract_book_data(soup):
19 | """
20 | BeautifulSoup Object에서 book data를 추출하는 함수
21 | :param soup: BeautifulSoup soup Object
22 | :return: contents(str)
23 | """
24 |
25 | upload_contents = ''
26 | new_books = soup.select(".goodsTxtInfo")
27 | url_prefix = "http://www.yes24.com"
28 |
29 | for new_book in new_books:
30 | book_name = new_book.select("a")[0].text
31 | url_suffix = new_book.select("a")[1].attrs['href']
32 | url = url_prefix + url_suffix
33 | price = new_book.select(".priceB")[0].text
34 |
35 | content = f"" + book_name + "" + ", " + price + "
\n"
36 | upload_contents += content
37 |
38 | return upload_contents
39 |
--------------------------------------------------------------------------------
/github_utils.py:
--------------------------------------------------------------------------------
1 | from github import Github
2 |
3 |
4 | def get_github_repo(access_token, repository_name):
5 | """
6 | github repo object를 얻는 함수
7 | :param access_token: Github access token
8 | :param repository_name: repo 이름
9 | :return: repo object
10 | """
11 | g = Github(access_token)
12 | return g.get_user().get_repo(repository_name)
13 |
14 |
15 | def upload_github_issue(repo, title, body):
16 | """
17 | 해당 repo에 title 이름으로 issue를 생성하고, 내용을 body로 채우는 함수
18 | :param repo: repo 이름
19 | :param title: issue title
20 | :param body: issue body
21 | :return: None
22 | """
23 | repo.create_issue(title=title, body=body)
24 |
25 |
26 |
--------------------------------------------------------------------------------
/hello.py:
--------------------------------------------------------------------------------
1 | print("Hello, World!")
2 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 | from pytz import timezone
4 | from crawling_yes24 import parsing_beautifulsoup, extract_book_data
5 | from github_utils import get_github_repo, upload_github_issue
6 |
7 |
8 | if __name__ == "__main__":
9 | access_token = os.environ['MY_GITHUB_TOKEN']
10 | repository_name = "github-action-with-python"
11 |
12 | seoul_timezone = timezone('Asia/Seoul')
13 | today = datetime.now(seoul_timezone)
14 | today_date = today.strftime("%Y년 %m월 %d일")
15 |
16 | yes24_it_new_product_url = "https://www.yes24.com/Product/Category/AttentionNewProduct?pageNumber=1&pageSize=24&categoryNumber=001001003"
17 |
18 | soup = parsing_beautifulsoup(yes24_it_new_product_url)
19 |
20 | issue_title = f"YES24 IT 신간 도서 알림({today_date})"
21 | upload_contents = extract_book_data(soup)
22 | repo = get_github_repo(access_token, repository_name)
23 | upload_github_issue(repo, issue_title, upload_contents)
24 | print("Upload Github Issue Success!")
25 |
26 |
27 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.7.1
2 | requests==2.22.0
3 | PyGithub==1.51
4 | pytz==2019.1
--------------------------------------------------------------------------------