├── .env.sample
├── .gitignore
├── README.md
├── app.py
├── github_util.py
├── gpt.py
└── requirements.txt


/.env.sample:
--------------------------------------------------------------------------------
1 | # environment variables defined inside a .env file
2 | GITHUB_TOKEN=<token>
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .kvenv
2 | .env
3 | __pycache__/
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # gpt_playground
 2 | 카카오에서 공개한 https://huggingface.co/kakaobrain/kogpt 를 사용하여 간단히 issue에 글을 올리면 답장을 다는 방식으로 테스트 해볼수 있습니다. 결과들을 같이 볼수 있도록 하기 위해 깃헙 이슈 방식을 사용하였습니다. 
 3 | 
 4 | 제목은 무시하고 본문만 prompt로 이용합니다.
 5 | 
 6 | ![image](https://user-images.githubusercontent.com/901975/142714843-1cc785f0-b16e-4638-a010-27999799bb6d.png)
 7 | 
 8 | 저의 연구실 학생에게 3090GPU 하나 빌려서 사용해서 응답이 좀 느리거나 불안정 할수 있습니다. 
 9 | 공개해준 카카오에게 감사합니다.
10 | 
11 | ### 간략 셋업
12 | ```bash
13 | python3 -m venv .kvenv
14 | source .kvenv/bin/activate
15 | pip install -r requirements.txt
16 | uvicorn app:app --reload
17 | 
18 | ngrok http 8000
19 | ```
20 | ### Contributions
21 | 코드 PR은 언제나 환영입니다.
22 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, Request
 2 | from pydantic import BaseModel
 3 | 
 4 | from github_util import write_comment
 5 | from gpt import gpt
 6 | 
 7 | import time
 8 | 
 9 | app = FastAPI(title="GPT")
10 | 
11 | @app.get("/")
12 | async def home():
13 |     return "<h2>SS</h2>"
14 | 
15 | @app.get("/prompt")
16 | def get_gpt(prompt: str):
17 |     start = time.time()
18 |     generated = gpt(prompt)
19 |     end = time.time()
20 |     print(end - start)
21 | 
22 |     return {"generated": generated, "time": end - start}
23 | 
24 | @app.post("/issue")
25 | async def get_issue(request: Request):
26 |     content = await request.json()
27 | 
28 |     action =   content['action']
29 |     if action != 'opened': 
30 |       return -1 # Nothing to do
31 | 
32 |     repos_name = content['repository']['full_name']
33 |     issue = content['issue']
34 | 
35 |     no = issue['number']
36 |     user = issue['user']['login']
37 | 
38 |     title = issue['title']
39 |     body = issue['body']
40 |   
41 |     res = gpt(body)
42 | 
43 |     print(repos_name, no, user, title, body, res)
44 | 
45 |     write_comment(res, repos_name, no)
46 |     return 100


--------------------------------------------------------------------------------
/github_util.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from datetime import datetime
 3 | from github import Github
 4 | import json
 5 | import logging
 6 | 
 7 | from dotenv import load_dotenv
 8 | import os
 9 | 
10 | load_dotenv()
11 | 
12 | logger = logging.getLogger(__name__)
13 | logger.setLevel(logging.INFO)
14 | 
15 | # does not raise an exception, but returns None
16 | git_token = os.getenv('GITHUB_TOKEN')
17 | 
18 | if git_token is None:
19 |     logger.error('Specify GITHUB_TOKEN as an environment variable.')
20 |     exit(-1)
21 | 
22 | gh = Github(git_token)
23 | 
24 | def webhook(content):
25 |     action =   content['action']
26 |     if action != 'opened': 
27 |       return -1 # Nothing to do
28 | 
29 |     repos_name = content['repository']['full_name']
30 |     issue = content['issue']
31 | 
32 |     no = issue['number']
33 |     user = issue['user']['login']
34 | 
35 |     title = issue['title']
36 |     body = issue['body']
37 |   
38 |     start = time.time()
39 |     n = index(title + "\n" + body, id=no, index_name=repos_name.replace('/', '_').lower())
40 |     end = time.time()
41 | 
42 |     print(repos_name, no, user, title, body)
43 | 
44 | def write_comment(comment, repos_name, issueid):
45 |     repo = gh.get_repo(repos_name)
46 | 
47 |     thread_issue = repo.get_issue(number=int(issueid))
48 |     if thread_issue:
49 |         res = thread_issue.create_comment(comment)
50 |         logger.info("Comment on " + str(res))
51 |         return
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     write_comment("Test", 'hunkim/digital-human', 1)


--------------------------------------------------------------------------------
/gpt.py:
--------------------------------------------------------------------------------
 1 | # code from https://huggingface.co/kakaobrain/kogpt
 2 | import torch
 3 | from transformers import AutoTokenizer, AutoModelForCausalLM 
 4 | 
 5 | tokenizer = AutoTokenizer.from_pretrained(
 6 |   'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b',
 7 |   bos_token='[BOS]', eos_token='[EOS]', unk_token='[UNK]', pad_token='[PAD]', mask_token='[MASK]'
 8 | )
 9 | model = AutoModelForCausalLM.from_pretrained(
10 |   'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b',
11 |   pad_token_id=tokenizer.eos_token_id,
12 |   torch_dtype=torch.float16, low_cpu_mem_usage=True
13 | ).to(device='cpu', non_blocking=True)
14 | _ = model.eval()
15 | 
16 | print("Model loading done!")
17 | 
18 | def gpt(prompt):
19 |   with torch.no_grad():
20 |     tokens = tokenizer.encode(prompt, return_tensors='pt').to(device='cpu', non_blocking=True)
21 |     gen_tokens = model.generate(tokens, do_sample=True, temperature=0.8, max_length=256)
22 |     generated = tokenizer.batch_decode(gen_tokens)[0]
23 | 
24 |   return generated
25 |     
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch~=1.9.0
 2 | transformers~=4.12.0
 3 | 
 4 | fastapi
 5 | uvicorn[standard]
 6 | 
 7 | python-dotenv
 8 | pyGithub
 9 | requests
10 | 


--------------------------------------------------------------------------------