├── 00.setenv.sh ├── 01.build.sh ├── 03.deploy.sh ├── run.sh ├── src ├── templates │ ├── index.html │ └── base.html ├── Dockerfile ├── summarize_web.py └── webhook.py ├── 02.test_local.sh ├── docs └── index.html └── README.md /00.setenv.sh: -------------------------------------------------------------------------------- 1 | PROJECT= 2 | MODULE= 3 | -------------------------------------------------------------------------------- /01.build.sh: -------------------------------------------------------------------------------- 1 | . ./00.setenv.sh 2 | cd src 3 | gcloud --project $PROJECT builds submit --tag gcr.io/$PROJECT/$MODULE 4 | -------------------------------------------------------------------------------- /03.deploy.sh: -------------------------------------------------------------------------------- 1 | . ./00.setenv.sh 2 | gcloud --project $PROJECT run deploy --image gcr.io/$PROJECT/$MODULE --platform managed --allow-unauthenticated --region asia-northeast3 $MODULE 3 | 4 | URL=`gcloud --project $PROJECT run services list | grep $MODULE | awk '{print $4}'` 5 | 6 | curl -X POST -d "url=$URL" $URL/setWebhook 7 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | # Run python source locally 2 | 3 | ps -ef | grep ngrok | grep -v grep | awk '{print "kill " $2}' | sh -x 4 | sleep 2 5 | rm nohup.out 6 | nohup ../ngrok http 5000 --log=stdout & 7 | sleep 2 8 | URL=`cat nohup.out | grep https | awk '{print $8}' | sed -e 's/url=//g'` 9 | export URL 10 | python src/webhook.py 11 | -------------------------------------------------------------------------------- /src/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block contents %} 4 | {% if title %} 5 |

Summary

6 |

Title: {{ title }}

7 |

8 | {% endif %} 9 | {% if structure %} 10 |

Structure

11 |

{{ structure }}

12 | {% endif %} 13 | {% endblock contents %} 14 | -------------------------------------------------------------------------------- /src/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | News Summarizer 4 | 5 | 6 | 7 |

News Summarizer

8 | 12 | 13 | {% block contents %} 14 | {% endblock contents %} 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /02.test_local.sh: -------------------------------------------------------------------------------- 1 | . ./00.setenv.sh 2 | 3 | docker pull gcr.io/$PROJECT/$MODULE 4 | docker run --rm --name $MODULE -d -p 8080:8080 gcr.io/$PROJECT/$MODULE 5 | 6 | ps -ef | grep ngrok | grep -v grep | awk '{print "kill " $2}' | sh -x 7 | sleep 2 8 | rm nohup.out 9 | nohup ../ngrok http 8080 --log=stdout & 10 | sleep 2 11 | URL=`cat nohup.out | grep https | awk '{print $8}' | sed -e 's/url=//g'` 12 | 13 | sleep 1 14 | docker logs -f $MODULE & 15 | curl -X POST -d "url=$URL" http://localhost:8080/setWebhook 16 | echo "" 17 | 18 | sleep 1 19 | echo Enter to stop... 20 | read a 21 | 22 | docker stop $MODULE 23 | 24 | -------------------------------------------------------------------------------- /src/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official lightweight Python image. 2 | # https://hub.docker.com/_/python 3 | FROM python:3.8-slim 4 | 5 | # Copy local code to the container image. 6 | ENV APP_HOME /app 7 | WORKDIR $APP_HOME 8 | COPY . ./ 9 | 10 | # Install production dependencies. 11 | RUN pip install Flask gunicorn openai google-cloud-translate transformers firebase-admin google-cloud-speech pydub 12 | RUN pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib 13 | RUN apt update 14 | RUN apt install -y ffmpeg 15 | 16 | ENV GOOGLE_APPLICATION_CREDENTIALS=/app/sa-key.json 17 | 18 | # Run the web service on container startup. Here we use the gunicorn 19 | # webserver, with only one worker process and only one thread per worker. 20 | # For environments with multiple CPU cores, increase the number of workers 21 | # to be equal to the cores available. 22 | CMD exec gunicorn --bind :8080 --workers 4 --threads 1 --timeout 0 webhook:app 23 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 |

 2 | Available commands:
 3 | 
 4 | /help
 5 |   This help page
 6 | 
 7 | /clear
 8 |   GPT에 전달하는 대화 히스토리(컨텍스트)를 삭제하고 새로운 토픽으로 대화합니다
 9 |   Clear all the history(contexts) that have been told and start new topic
10 | 
11 | /topic
12 |   현재 대화 중인 토픽을 표시합니다. 실제로는 GPT에게 "list up topics we have discussed" 질문을 던집니다
13 |   Display current topics. This is actually a question "list up topics we have discussed" to GPT model
14 | 
15 | /params
16 |   GPT 모델에 대한 하이퍼 파라미터 목록을 조회하거나 변경합니다. 추가적인 아규먼트가 없으면 다음과 같이 표시합니다
17 | 
18 |   (example) 
19 |   frequency_penalty: 0.5
20 |   presence_penalty: 0.5
21 |   max_tokens: 2048
22 |   temperature: 0.5
23 | 
24 |   다음과 같이 파라미터를 수정할 수 있습니다
25 |   /params frequency_penalty  
26 | 
27 |   다음과 같이 파라미터를 디폴트 값으로 변경합니다
28 |   /params reset
29 | 
30 | /system 
31 |   GPT 모델의 System role 메시지를 지정합니다. GPT 모델이 어떤 모드로 동작할지 지정합니다
32 |   디폴트는 "You are a helpful assistant" 입니다
33 |   예를 들어 한국어 챗봇처럼 답변하게 하고 싶으면 다음과 같이 입력합니다
34 |   (example) "You're a korean chatbot that is answering in Korean"
35 | 
36 | /translate 
37 |   Google Translator를 사용하여 질문(입력)을 영어로 번역하여 전달 후, 영어로 받은 응답을 한국어로 번경합니다
38 |   Using Google Translator, the bot translate your question into English for GPT, and translate the answer from GPT into specified language
39 | 
40 | /history
41 |   현재 대화중인 메시지(GPT의 입력 토큰)를 표시합니다
42 |   Print messages(GPT input tokens)
43 | 
44 | /model 
45 |   현재 사용 중인 GPT 모델을 변경합니다
46 |   Change the bot's GPT model
47 |   
48 |   (example) /model gpt-3.5-turbo
49 |   (example) /model gpt-4
50 | 
51 | /sum 
52 |   주어진 URL의 웹페이지를 불러와 summary를 출력합니다
53 |   Read web page content from the URL and summarize it
54 | 
55 |   (example) /sum https://edition.cnn.com/2023/04/15/asia/taiwan-china-invasion-defense-us-weapons-intl-hnk-dst/index.html
56 | 
57 | /gpt3
58 |   GPT 3.5 모델로 변경합니다
59 | 
60 | /gpt4
61 |   GPT 4 모델로 변경합니다
62 | 
63 | /save
64 |   Save current message history and parameters to Firebase DB
65 | 
66 | /list
67 |   List up saved histories
68 | 
69 | /load 
70 |   Load saved history with 
71 | 
72 | 
73 |

74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # telegram-gptbot 2 | 3 | ChatGPT를 편하게 불러쓸수 있게 텔레그램 봇에 openai API를 연동해서 개인적으로 쓰기 위한 Python 기반의 프로그램입니다.\ 4 | 메신저로 아무때나 쉽게 쓸수 있고 필요하면 다른 챗/채널에도 연결할 수 있어 편리합니다.\ 5 | 우선은 OpenAI에 가셔서 API 사용을 위한 유료 가입을 하셔야 합니다. API 비용은 API 호출당 charging으로 chatgpt용 모델의 경우 많이 저렴합니다.\ 6 | 제가 사용해 보니 비용은 ChatGPT 유료의 거의 1/10 수준인데 응답속도는 ChatGPT 프리미엄급입니다. 다만, ChatGPT 처럼 한자씩 typing 하는 효과는 없습니다.\ 7 | 간만에 코딩도 잼있있고 해서 혹시 궁금하신 분들을 위해 봇 소스 github에 공유합니다.\ 8 | 봇의 런타임은 Google Cloud의 Cloud Run에서 돌립니다. 서버 없이 바로 사용할 수 있어 편리하고, 컨테이너를 바로 HTTPS(SSL) endpoint를 만들 수 있으므로 봇 운영에 편리합니다. 9 | 10 | ## 소스 및 사용 방법 설명 11 | 12 | 0. Google Cloud 생성 13 | * 따로 안내는 하지 않겠습니다. 14 | * 봇의 webhook 호출을 받기 위한 서비스 목적으로, 만약 개인적으로 운영하는 서버가 있으면 ngrok으로 포트포워딩 하셔도 됩니다. 15 | 16 | 1. 텔레그램 챗봇 생성 17 | * 구글링 해 보면 너무 많습니다. BotFather 님에게 생성 요청하시면 됩니다. 18 | * 생성된 봇 Token을 소스에 넣으면 됩니다. 19 | 20 | 2. OpenAI API Key 21 | * 역시 구글링 해 보시면 가입하는 방법 나옵니다. 22 | 23 | 3. 소스 24 | * 단 하나의 소스 src/webhook.py 로 이루어져 있습니다. 25 | * 아주 심플한 Flask 웹프로그램에 심지어 코딩도 Copilot 친구의 도움 받아서 했기 때문에 중복 코드 등도 많습니다 ^^;; 26 | * 텔레그램 봇의 Token과 OpenAI API Key을 각자 받아서 넣으시면 됩니다. 27 | * 봇의 abusing을 막기 위해 제 텔레그램 Chat ID를 if 문으로 체크하는 부분이 있는데, 각자 ID가 다르므로 일단 remark로 막고 확인 후에 변경하시면 됩니다. 28 | 29 | 4. 실행 방법 - 01 ~ 03 차례로 실행하면 됩니다. 30 | * 00.setenv.sh - 컨테이너 이름, GCP Project ID 등 지정 31 | * 01.build.sh - 컨테이너 빌드(Cloud Build 사용) 32 | * 02.test_local.sh - 컨테이너를 Cloud Run으로 올리지 않고 개발 환경에서 바로 테스트하고자 할 경우 33 | * 03.deploy.sh - 컨테이너를 Cloud Run으로 배포 후에 텔레그램에 webhook endpoint로 셋업 34 | * run.sh - 컨테이너 없이 바로 Python 코드로 테스트 하고자 할 경우 사용하는 스크립트 35 | 36 | 5. 봇 커맨드 사용법 37 | * https://javalove93.github.io/telegram-gptbot/ 38 | 39 | ## Design Considerations 40 | 41 | * OpenAI GPT models have number of token limitation like other NLP models. gpt-3-turbo has 2048 for reuqest and 4096 for being shared between reqeust and response. So default max_tokens for gpt-3-turbo is 2048 that means your input can't exceed 2048 tokens where token size is usually slightly more than number of words. This uses transformers tokenizer to figure out token size before sending reqeust. 42 | * To help users to handle that token size limitation, this bot is to remove(forget) old conversation histories if new request(old history + new message) is bigger than max_tokens. If new message is still bigger than token limit, then it's tyring to remove later part of the message. 43 | 44 | ## Telegram ChatGPT bot usage example 45 | * It can be used as log file formatter in interative way !!!! 46 | * If you're good prompt engineer, you may use the prompt, "please remove DEFAULT and date from the text and make it to pretty format" 47 | ![image](https://user-images.githubusercontent.com/4580526/227698759-c812ea58-5502-4aef-95dd-818e5822c09a.png) 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /src/summarize_web.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import json 4 | import sys 5 | from langdetect import detect 6 | from flask import Flask, request, jsonify, render_template, url_for, redirect, flash, session 7 | import openai 8 | 9 | app = Flask(__name__) 10 | app.secret_key = 'your_secret_key_here' 11 | 12 | # OpenAI API Key 13 | openai_apikey = "YOUR_OPENAI_API_KEY" 14 | openai.api_key = openai_apikey 15 | 16 | def read_webpage(url): 17 | headers = { 18 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', 19 | 'accpet': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 20 | # 'accept-encoding': 'gzip, deflate, br', 21 | 'referer': 'https://www.google.com/', 22 | 'accept-language': 'ko,en-US;q=0.9,en;q=0.8' 23 | } 24 | 25 | response = requests.get(url, headers=headers) 26 | 27 | if response.status_code == 200: 28 | html = response.text 29 | soup = BeautifulSoup(html, 'html.parser') 30 | 31 | # get the title 32 | title = soup.select_one('title').get_text().strip() 33 | 34 | # get the language 35 | meta_tag = soup.find('meta', {'charset': True}) 36 | charset = meta_tag['charset'] 37 | 38 | # get the body 39 | body = soup.select_one('body') 40 | 41 | # list the first level children of the body 42 | children = body.findChildren() 43 | content = [] 44 | text_content = '' 45 | for child in children: 46 | if child.name in ['h1', 'h2', 'h3', 'h4', 'p']: 47 | content.append({ 48 | 'type': child.name, 49 | 'text': child.get_text().strip() 50 | }) 51 | text_content += child.get_text().strip() + '\n' 52 | 53 | language = detect(text_content) 54 | 55 | print("Charset: ", charset) 56 | print("Language: ", language) 57 | 58 | return { 59 | 'result': 'success', 60 | 'language': language, 61 | 'title': title, 62 | 'content': content 63 | } 64 | else: 65 | return { 66 | 'result': 'error', 67 | 'status_code': response.status_code, 68 | 'err_msg': response.text 69 | } 70 | 71 | def complete(messages): 72 | try: 73 | response = openai.ChatCompletion.create( 74 | model="gpt-3.5-turbo", 75 | messages=messages, 76 | temperature=0.1, 77 | max_tokens=1024, 78 | top_p=1, 79 | frequency_penalty=0.5, 80 | presence_penalty=0.5, 81 | stop=None 82 | ) 83 | 84 | return response['choices'][0]['message']['content'] 85 | except Exception as e: 86 | print(e) 87 | return 'Error' 88 | 89 | def count_sentences(text): 90 | length = 0 91 | for line in text.split('\n'): 92 | if line.strip() != '': 93 | for str in line.split('.'): 94 | if str.strip() != '': 95 | length += 1 96 | return length 97 | 98 | def summarize_content(title, content, language='en'): 99 | MIN_SECTIONS = 5 100 | SUM_RATIO_KOR = 5 101 | SUM_RATIO_ENG = 5 102 | SUM_RATIO_SUMMARY = 3 103 | MAX_CHUNK_SIZE = 500 104 | 105 | strcuture = { 106 | 'titles': [{'title': title}], 107 | 'sections': [] 108 | } 109 | 110 | section_list = { 111 | 'h1': [], 112 | 'h2': [], 113 | 'h3': [], 114 | 'h4': [] 115 | } 116 | for item in content: 117 | if item['type'] in ['h1', 'h2', 'h3', 'h4']: 118 | section_list[item['type']].append(item.copy()) 119 | 120 | section_tag = None 121 | if len(section_list['h1']) >= MIN_SECTIONS: 122 | strcuture['sections'] = section_list['h1'] 123 | section_tag = 'h1' 124 | elif len(section_list['h2']) >= MIN_SECTIONS: 125 | strcuture['sections'] = section_list['h2'] 126 | strcuture['titles'].extend(section_list['h1']) 127 | section_tag = 'h2' 128 | elif len(section_list['h3']) >= MIN_SECTIONS: 129 | strcuture['sections'] = section_list['h3'] 130 | strcuture['titles'].extend(section_list['h1']) 131 | strcuture['titles'].extend(section_list['h2']) 132 | section_tag = 'h3' 133 | elif len(section_list['h4']) >= MIN_SECTIONS: 134 | strcuture['sections'] = section_list['h4'] 135 | strcuture['titles'].extend(section_list['h1']) 136 | strcuture['titles'].extend(section_list['h2']) 137 | strcuture['titles'].extend(section_list['h3']) 138 | section_tag = 'h4' 139 | else: 140 | if len(section_list['h1']) > 1: 141 | strcuture['sections'] = section_list['h1'] 142 | section_tag = 'h1' 143 | elif len(section_list['h2']) > 1: 144 | strcuture['sections'] = section_list['h2'] 145 | strcuture['titles'].extend(section_list['h1']) 146 | section_tag = 'h2' 147 | elif len(section_list['h3']) > 1: 148 | strcuture['sections'] = section_list['h3'] 149 | strcuture['titles'].extend(section_list['h1']) 150 | strcuture['titles'].extend(section_list['h2']) 151 | section_tag = 'h3' 152 | elif len(section_list['h4']) > 1: 153 | strcuture['sections'] = section_list['h4'] 154 | strcuture['titles'].extend(section_list['h1']) 155 | strcuture['titles'].extend(section_list['h2']) 156 | strcuture['titles'].extend(section_list['h3']) 157 | section_tag = 'h4' 158 | 159 | messages = [ 160 | {'role': 'system', 'content': 'You are a helpful assistant'} 161 | ] 162 | 163 | if language == 'ko': 164 | message = '글의 구조는 title > h1 > h2 > h3 > p 의 순서이다. 하나의 문장으로 제목을 추출하라:\n' 165 | else: 166 | message = 'While strcuture is title > h1 > h2 > h3 > h4 > p, please get a title as a sentence from:\n' 167 | for title in strcuture['titles']: 168 | if 'title' in title: 169 | message += "title: {}\n".format(title['title']) 170 | else: 171 | message += "{}: {}\n".format(title['type'], title['text']) 172 | 173 | messages.append({'role': 'user', 'content': message}) 174 | strcuture['messages'] = messages 175 | title = complete(messages) 176 | # if title contains a quotation, extract it 177 | if title.find('"') != -1: 178 | # Extract a sentence quoted by " 179 | import re 180 | title = re.findall(r'\"(.+?)\"', title)[0] 181 | strcuture['title'] = title 182 | else: 183 | strcuture['title'] = title.split('\n')[0] 184 | 185 | # return {'structure': json.dumps(strcuture, indent=4)} 186 | 187 | print("Found title: {}".format(strcuture['title'])) 188 | 189 | if section_tag is not None: 190 | current_section = -1 191 | for item in content: 192 | for section in strcuture['sections']: 193 | if item['type'] == section['type']: 194 | if item['text'] == section['text']: 195 | current_section += 1 196 | strcuture['sections'][current_section]['content'] = [] 197 | break 198 | 199 | if current_section == -1: 200 | strcuture['sections'].insert(0, { 201 | 'type': 'preface', 202 | 'text': '', 203 | 'content': [] 204 | }) 205 | current_section = 0 206 | 207 | strcuture['sections'][current_section]['content'].append(item.copy()) 208 | 209 | print("Found {} sections".format(len(strcuture['sections']))) 210 | 211 | # Firstly, try to summarize all the sections at once 212 | messages = [ 213 | {'role': 'system', 'content': 'You are a helpful assistant'} 214 | ] 215 | 216 | if language == 'ko': 217 | message = '글의 구조는 title > h1 > h2 > h3 > p 의 순서이다\n\n' 218 | else: 219 | message = "Text strcuture is title > h1 > h2 > h3 > h4 > p.\n\n" 220 | message += "title: {}\n".format(strcuture['title']) 221 | chunk = '' 222 | for section in strcuture['sections']: 223 | if section['type'] == 'preface': 224 | for item in section['content']: 225 | if item['type'] == 'p': 226 | chunk += "{}: {}\n".format(item['type'], item['text']) 227 | else: 228 | chunk += "{}: {}\n".format(section['type'], section['text']) 229 | for item in section['content']: 230 | chunk += "{}: {}\n".format(item['type'], item['text']) 231 | # Count the number of sentences of message 232 | length = count_sentences(chunk) 233 | 234 | if language == 'ko': 235 | length = int(length / SUM_RATIO_KOR / SUM_RATIO_SUMMARY) + 1 236 | command = "\nPlease summarize the text as a {} sentence long paragraph in Korean:\n".format(length) 237 | else: 238 | length = int(length / SUM_RATIO_ENG / SUM_RATIO_SUMMARY) + 1 239 | command = "\nPlease summarize the text as a {} sentence long paragraph:\n".format(length) 240 | 241 | print("Trying to summarize all sections as {} sentences".format(length)) 242 | 243 | messages.append({'role': 'user', 'content': "{}{}{}".format(message, chunk, command)}) 244 | strcuture['messages'] = messages 245 | 246 | # Write structure as json text into a file 247 | with open('structure.json', 'w') as f: 248 | f.write(json.dumps(strcuture, indent=4)) 249 | 250 | print("Before: {}\n".format(messages[1]['content'])) 251 | summary = complete(messages) 252 | if summary == 'Error': 253 | # Secondly, try to summarize each section separately 254 | for section in strcuture['sections']: 255 | messages = [ 256 | {'role': 'system', 'content': 'You are a helpful assistant'} 257 | ] 258 | if language == 'ko': 259 | message = '글의 구조는 title > h1 > h2 > h3 > p 의 순서이다.\n\n' 260 | else: 261 | message = "Text strcuture is title > h1 > h2 > h3 > h4 > p.\n\n" 262 | # message += "please summarize the content as maximum {} sentences paragraph from:\n".format(MAX_LINES) 263 | message += "title: {}\n".format(strcuture['title']) 264 | 265 | chunk = '' 266 | if section['type'] == 'preface': 267 | chunk += "{}: {}\n".format(section_tag, section['text']) 268 | for item in section['content']: 269 | if item['type'] == 'p': 270 | chunk += "{}: {}\n".format(item['type'], item['text']) 271 | else: 272 | chunk += "{}: {}\n".format(section['type'], section['text']) 273 | for item in section['content']: 274 | chunk += "{}: {}\n".format(item['type'], item['text']) 275 | 276 | # Count the number of sentences of message 277 | length = count_sentences(chunk) 278 | 279 | if language == 'ko': 280 | length = int(length / SUM_RATIO_KOR) + 1 281 | command = "\nPlease summarize the content as maximum {} bullets in Korean:\n\n".format(length) 282 | else: 283 | length = int(length / SUM_RATIO_ENG) + 1 284 | command = "\nPlease summarize the content as maximum {} bullets:\n\n".format(length) 285 | 286 | print("Trying to summarize section as {} sentences".format(length)) 287 | print('section: {}'.format(section['text'])) 288 | 289 | messages.append({'role': 'user', 'content': "{}{}{}".format(message, chunk, command)}) 290 | section['messages'] = messages 291 | 292 | summary = complete(messages) 293 | if summary == 'Error': 294 | # Thirdly, try to summarize each chunk of section separately 295 | chunk_array = chunk.split('\n') 296 | chunk_index = 0 297 | chunk_size = 0 298 | chunk_segment = '' 299 | previous_summary = '' 300 | for chunk_line in chunk_array: 301 | chunk_size += len(chunk_line.split(' ')) 302 | if chunk_size > MAX_CHUNK_SIZE: 303 | chunk_size = 0 304 | chunk_index += 1 305 | 306 | messages = [ 307 | {'role': 'system', 'content': 'You are a helpful assistant'}, 308 | {'role': 'user', 'content': "Previous summary is:\n{}\n".format(previous_summary)} 309 | ] 310 | 311 | # Count the number of sentences of message 312 | length = count_sentences("{}\n{}".format(previous_summary, chunk_segment)) 313 | 314 | if language == 'ko': 315 | length = int(length / SUM_RATIO_KOR) + 1 316 | command = "\nPlease summarize the content with previous summary as maximum {} bullets in Korean:\n\n".format(length) 317 | else: 318 | length = int(length / SUM_RATIO_ENG) + 1 319 | command = "\nPlease summarize the content with previous summary as maximum {} bullets:\n\n".format(length) 320 | 321 | print("Trying to summarize each chunk of section as {} sentences".format(length)) 322 | print('section: {}'.format(section['text'])) 323 | 324 | messages.append({'role': 'user', 'content': "{}{}{}".format(message, chunk_segment, command)}) 325 | summary = complete(messages) 326 | print("previous summary: {}".format(previous_summary)) 327 | print("chunk summary: {}".format(summary)) 328 | previous_summary = summary 329 | chunk_segment = '' 330 | 331 | chunk_segment += chunk_line + '\n' 332 | 333 | if chunk_segment != '': 334 | messages = [ 335 | {'role': 'system', 'content': 'You are a helpful assistant'}, 336 | {'role': 'user', 'content': "Previous summary is:\n{}\n".format(previous_summary)} 337 | ] 338 | 339 | if language == 'ko': 340 | command = "\nPlease summarize the content with previous summary as maximum {} bullets in Korean:\n\n".format(length) 341 | else: 342 | command = "\nPlease summarize the content with previous summary as maximum {} bullets:\n\n".format(length) 343 | 344 | print("Trying to summarize each chunk of section as {} sentences".format(length)) 345 | print('section: {}'.format(section['text'])) 346 | 347 | messages.append({'role': 'user', 'content': "{}{}{}".format(message, chunk_segment, command)}) 348 | summary = complete(messages) 349 | print("previous summary: {}".format(previous_summary)) 350 | print("chunk summary: {}".format(summary)) 351 | 352 | section['summary'] = summary 353 | 354 | print('section: {}'.format(section['text'])) 355 | print("summary: {}".format(summary)) 356 | 357 | # Finally summarize all the sections 358 | messages = [ 359 | {'role': 'system', 'content': 'You are a helpful assistant'} 360 | ] 361 | if language == 'ko': 362 | message = 'These are title and sections.\n\n' 363 | else: 364 | message = "These are title and sections.\n\n" 365 | message += "Title: {}\n".format(strcuture['title']) 366 | chunk = '' 367 | for section in strcuture['sections']: 368 | if section['type'] == 'preface': 369 | chunk += "Section: {}\n".format(section['text']) 370 | chunk += "{}\n".format(section['summary']) 371 | else: 372 | chunk += "Section: {}\n".format(section['text']) 373 | chunk += "{}\n".format(section['summary']) 374 | # Count the number of sentences of message 375 | length = count_sentences(chunk) 376 | 377 | if language == 'ko': 378 | length = int(length / SUM_RATIO_SUMMARY) + 1 # Korean summarization ratio is too high, so apply English ratio 379 | command = "\nPlease summarize all sections as a {} sentence long paragraph in Korean:\n".format(length) 380 | else: 381 | length = int(length / SUM_RATIO_SUMMARY) + 1 382 | command = "\nPlease summarize all sections as a {} sentence long paragraph:\n".format(length) 383 | 384 | print("Trying to summarize section as {} sentences".format(length)) 385 | 386 | messages.append({'role': 'user', 'content': "{}{}{}".format(message, chunk, command)}) 387 | strcuture['messages'] = messages 388 | 389 | print("Before: {}\n".format(messages[1]['content'])) 390 | summary = complete(messages) 391 | 392 | print("Final summary: {}".format(summary)) 393 | strcuture['summary'] = summary 394 | 395 | else: 396 | messages = [ 397 | {'role': 'system', 'content': 'You are a helpful assistant'} 398 | ] 399 | if language == 'ko': 400 | message = '글의 구조는 title > h1 > h2 > h3 > p 의 순서이다.\n\n' 401 | else: 402 | message = "Text strcuture is title > h1 > h2 > h3 > h4 > p.\n\n" 403 | # message += "please summarize the content as maximum {} sentences paragraph from:\n".format(MAX_LINES) 404 | message += "title: {}\n".format(strcuture['title']) 405 | 406 | chunk = '' 407 | for item in content: 408 | chunk += "{}: {}\n".format(item['type'], item['text']) 409 | 410 | # Count the number of sentences of message 411 | length = count_sentences(chunk) 412 | 413 | if language == 'ko': 414 | length = int(length / SUM_RATIO_KOR / SUM_RATIO_SUMMARY) + 1 415 | command = "\nPlease summarize the text as a {} sentence long paragraph in Korean:\n".format(length) 416 | else: 417 | length = int(length / SUM_RATIO_ENG / SUM_RATIO_SUMMARY) + 1 418 | command = "\nPlease summarize the text as a {} sentence long paragraph:\n".format(length) 419 | 420 | print("Trying to summarize all as {} sentences without section".format(length)) 421 | 422 | messages.append({'role': 'user', 'content': "{}{}{}".format(message, chunk, command)}) 423 | strcuture['messages'] = messages 424 | 425 | # Write structure as json text into a file 426 | with open('structure.json', 'w') as f: 427 | f.write(json.dumps(strcuture, indent=4)) 428 | 429 | print("Before: {}\n".format(messages[1]['content'])) 430 | summary = complete(messages) 431 | print("Final summary: {}".format(summary)) 432 | strcuture['summary'] = summary 433 | 434 | return { 435 | 'title': strcuture['title'], 436 | 'summary': strcuture['summary'] 437 | } 438 | 439 | return {'structure': json.dumps(strcuture, indent=4)} 440 | 441 | 442 | @app.route('/') 443 | def index(): 444 | if 'url' not in session: 445 | session['url'] = 'https://edition.cnn.com/2023/04/15/americas/darien-gap-migrants-colombia-panama-whole-story-cmd-intl/index.html' 446 | return render_template('index.html', url=session.get('url')) 447 | 448 | @app.route('/summarize', methods=['POST']) 449 | def summarize(): 450 | url = request.form['url'] 451 | session['url'] = url 452 | print(url) 453 | result = read_webpage(url) 454 | if result['result'] == 'success': 455 | summary = summarize_content(result['title'], result['content'], result['language']) 456 | if 'title' in summary: 457 | return render_template('index.html', url=session.get('url'), title=summary['title'], content=summary['summary']) 458 | else: 459 | return render_template('index.html', url=session.get('url'), structure=summary['structure']) 460 | # return render_template('index.html', title=result['title'], content=json.dumps(result['content'], indent=4)) 461 | else: 462 | return render_template('index.html', url=session.get('url'), title=result['status_code'], content=result['err_msg']) 463 | 464 | if len(sys.argv) > 1: 465 | # get the url from the command line 466 | url = sys.argv[1] 467 | print(url) 468 | 469 | # read the webpage 470 | result = read_webpage(url) 471 | if result['result'] == 'success': 472 | print(result['title']) 473 | print(json.dumps(result['content'], indent=4)) 474 | else: 475 | print(result['status_code']) 476 | print(result['content']) 477 | 478 | sys.exit(0) 479 | 480 | if __name__ == '__main__': 481 | app.run(host='0.0.0.0', port=5000, debug=True) 482 | -------------------------------------------------------------------------------- /src/webhook.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import re 3 | import subprocess 4 | import traceback 5 | import requests 6 | import json 7 | from flask import Flask, request 8 | import os 9 | import openai 10 | from google.cloud import translate_v2 as translate 11 | import logging 12 | import sys 13 | from bs4 import BeautifulSoup 14 | from langdetect import detect 15 | 16 | # Telegarm python packages 17 | # pip install python-telegram-bot 18 | # import telegram 19 | # bot = telegram.Bot(token='YOUR_TOKEN') 20 | 21 | logging.basicConfig(level=logging.INFO, stream=sys.stdout) 22 | 23 | app = Flask(__name__) 24 | 25 | # Replace YOUR_BOT_TOKEN with your actual bot token 26 | # ChatGPTbyJerryBot 27 | bot_token = "YOUR_BOT_TOKEN" ############## REDACTED 28 | 29 | # OpenAI API Key 30 | openai_apikey = "YOUR_OPENAI_API_KEY" ############## REDACTED 31 | openai.api_key = openai_apikey 32 | 33 | # Firebase Realtime Database initialization 34 | firebase_db = None 35 | try: 36 | import firebase_admin 37 | from firebase_admin import credentials, db 38 | if "FIREBASE_SA_KEY" in os.environ: 39 | cred = credentials.Certificate(os.environ['FIREBASE_SA_KEY']) 40 | else: 41 | cred = credentials.Certificate('YOUR_FIREBASE_SA_KEY_PATH_IF_YOU_WANT') ############## REDACTED 42 | firebase_admin.initialize_app(cred, { 43 | 'databaseURL: YOUR_FIREBASE_DATABASE_URL' ############## REDACTED 44 | }) 45 | firebase_db = db.reference('/') 46 | logging.info("Firebase Realtime Database initialized") 47 | except Exception as e: 48 | logging.info("Firebase Realtime Database initialization failed: {}".format(e)) 49 | 50 | # Params for openai api 51 | params = {} 52 | saved_history = { 53 | } 54 | 55 | # allowed chatid 56 | allowed_chatid = [ALLOWED_CHATID_1, ALLOWED_CHATID_2, ... as number] ############## REDACTED 57 | 58 | def get_system_info(): 59 | # get last updated time of 'webhook.py' file in KST 60 | # Get the timestamp of the file in UTC 61 | timestamp = datetime.datetime.fromtimestamp(os.path.getmtime('webhook.py')) 62 | # Convert the UTC timestamp to KST timezone 63 | kst_timestamp = timestamp.astimezone(datetime.timezone(datetime.timedelta(hours=9))) 64 | last_updated = kst_timestamp.strftime('%Y-%m-%d %H:%M:%S') 65 | message = "Last updated: {}".format(last_updated) 66 | # get file creation time of '/proc/1' 67 | # Get the timestamp of the file in UTC 68 | timestamp = datetime.datetime.fromtimestamp(os.path.getctime('/proc/1')) 69 | # Convert the UTC timestamp to KST timezone 70 | kst_timestamp = timestamp.astimezone(datetime.timezone(datetime.timedelta(hours=9))) 71 | started = kst_timestamp.strftime('%Y-%m-%d %H:%M:%S') 72 | message += ", Boot time: {}".format(started) 73 | return message 74 | def params_get(chatid): 75 | global firebase_db, params 76 | chatid = str(chatid) 77 | if firebase_db is not None: 78 | try: 79 | # create if 'params' exists 80 | if firebase_db.child('params').get() is None: 81 | firebase_db.child('params').set({}) 82 | return firebase_db.child('params').child(chatid).get() 83 | except Exception as e: 84 | full_stack_error_msg = traceback.format_exc() 85 | logging.error(full_stack_error_msg) 86 | else: 87 | if chatid not in params: 88 | params[chatid] = {} 89 | return params[chatid] 90 | 91 | def params_set(chatid, value): 92 | global firebase_db, params 93 | chatid = str(chatid) 94 | if firebase_db is not None: 95 | try: 96 | # create if 'params' exists 97 | if firebase_db.child('params').get() is None: 98 | firebase_db.child('params').set({}) 99 | firebase_db.child('params').child(chatid).set(value) 100 | except Exception as e: 101 | full_stack_error_msg = traceback.format_exc() 102 | logging.error(full_stack_error_msg) 103 | else: 104 | if chatid not in params: 105 | params[chatid] = None 106 | params[chatid] = value 107 | 108 | def save_history(chatid, title): 109 | global firebase_db, saved_history 110 | chatid = str(chatid) 111 | # timestamp as GMT+9 112 | timestamp = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))).strftime('%Y-%m-%d %H:%M:%S') 113 | if firebase_db is not None: 114 | try: 115 | firebase_db.child('saved_history').child(chatid).child(timestamp).set({ 116 | "title": title, 117 | "params": params_get(chatid) 118 | }) 119 | except Exception as e: 120 | full_stack_error_msg = traceback.format_exc() 121 | logging.error(full_stack_error_msg) 122 | else: 123 | if chatid not in saved_history: 124 | saved_history[chatid] = {} 125 | saved_history[chatid][timestamp] = { 126 | "title": title, 127 | "params": params_get(chatid) 128 | } 129 | 130 | def get_saved_history(chatid): 131 | global firebase_db, saved_history 132 | chatid = str(chatid) 133 | if firebase_db is not None: 134 | try: 135 | return firebase_db.child('saved_history').child(chatid).get() 136 | except Exception as e: 137 | full_stack_error_msg = traceback.format_exc() 138 | logging.error(full_stack_error_msg) 139 | else: 140 | if chatid not in saved_history: 141 | return None 142 | return saved_history[chatid] 143 | 144 | def load_history(chatid, key): 145 | global firebase_db, saved_history 146 | chatid = str(chatid) 147 | if firebase_db is not None: 148 | try: 149 | history = firebase_db.child('saved_history').child(chatid).child(key).get() 150 | params_set(chatid, history['params']) 151 | except Exception as e: 152 | full_stack_error_msg = traceback.format_exc() 153 | logging.error(full_stack_error_msg) 154 | else: 155 | params_set(chatid, saved_history[chatid][key]['params']) 156 | 157 | @app.route('/webhook', methods=['POST']) 158 | def webhook(): 159 | global openai_apikey, allowed_chatid, bot_token 160 | 161 | # Extract the message from the incoming request 162 | update = request.json 163 | # get headers 164 | headers = request.headers 165 | # logging.info(headers) 166 | logging.info(update) 167 | # logging.info(request.data) 168 | 169 | # 1:1 chat message sample 170 | ''' 171 | { 172 | "update_id": 295491250, 173 | "message": { # edited_message 인 경우가 있음 if message is edited 174 | "message_id": 51, 175 | "from": { 176 | "id": CHAT_ID, ############## REDACTED 177 | "is_bot": false, 178 | "first_name": "\uba85\ud6c8", 179 | "last_name": "\uc815", 180 | "language_code": "ko" 181 | }, 182 | "chat": { 183 | "id": CHAT_ID, ############## REDACTED 184 | "first_name": "\uba85\ud6c8", 185 | "last_name": "\uc815", 186 | "type": "private" 187 | }, 188 | "date": 1678285290, 189 | "text": "hi" 190 | } 191 | } 192 | ''' 193 | 194 | # channel chat message sample 195 | ''' 196 | { 197 | "update_id": 295491251, 198 | "channel_post": { 199 | "message_id": 131, 200 | "sender_chat": { 201 | "id": CHAT_ID, ############## REDACTED 202 | "title": "Jerry's Topics", 203 | "type": "channel" 204 | }, 205 | "chat": { 206 | "id": CHAT_ID, ############## REDACTED 207 | "title": "Jerry's Topics", 208 | "type": "channel" 209 | }, 210 | "date": 1678285353, 211 | "text": "test" 212 | } 213 | } 214 | ''' 215 | 216 | try: 217 | debug = "on" 218 | stt_text = None 219 | try: 220 | update_id = update['update_id'] 221 | if 'channel_post' in update: 222 | chatid = update['channel_post']['chat']['id'] 223 | message = update['channel_post'] 224 | elif 'edited_message' in update: 225 | chatid = update['edited_message']['chat']['id'] 226 | message = update['edited_message'] 227 | else: 228 | chatid = update['message']['chat']['id'] 229 | message = update['message'] 230 | message = message['text'] 231 | 232 | logging.info("message length is {}".format(len(message))) 233 | except Exception as e: 234 | full_stack_error_msg = traceback.format_exc() 235 | logging.error("Error: {}".format(full_stack_error_msg)) 236 | logging.error(json.dumps(update, indent=4)) 237 | 238 | return "OK" 239 | 240 | # frequency_penalty: 0.5 241 | """ 242 | PT: The `frequency_penalty` parameter in OpenAI's GPT-3 API can be set to any value between 0.0 and 1.0, with 0.0 indicating no penalty for repetition and 1.0 indicating the strongest possible penalty for repetition. 243 | 244 | Here's an overview of what different values of `frequency_penalty` might result in: 245 | 246 | - 0.0: No penalty for repetition. The model may generate more repetitive output. 247 | - 0.5: Moderate penalty for repetition. The model will try to avoid repeating words or phrases too often. 248 | - 1.0: Strong penalty for repetition. The model will avoid repeating words or phrases as much as possible, which may result in more diverse output but could also make the output less coherent or grammatically correct. 249 | 250 | The optimal value for `frequency_penalty` will depend on the specific use case and the desired output. A higher value may be more appropriate for generating creative or novel output, while a lower value may be more appropriate for generating more coherent or structured output. 251 | """ 252 | 253 | # presence_penalty: 0.5 254 | """ 255 | GPT: `presence_penalty` is a parameter in OpenAI's GPT-3 language model that controls the model's tendency to generate repeated phrases or sentences. It is used to penalize the model for generating text that is too similar to the input text or previously generated text. 256 | 257 | The presence penalty value ranges from 0 to 1, with 0 indicating no penalty and 1 indicating the maximum penalty. A higher presence penalty value results in the model being more cautious about generating text that is similar to the input or previous outputs. 258 | 259 | This parameter is useful in preventing the model from generating repetitive or redundant text and can be adjusted based on the specific use case and desired output. 260 | """ 261 | 262 | message_trimed = None 263 | 264 | # Allow only specific chat id to prevent abusing of your openai budget !!!! 265 | if chatid in allowed_chatid: 266 | logging.info("chatid: {}, allowed_chatid: {}".format(chatid, allowed_chatid)) 267 | if chatid < 0: 268 | if message.startswith("[gpt]"): 269 | message = message[5:].strip() 270 | logging.info("channel chat into GPT: {}".format(message)) 271 | else: 272 | return "OK" 273 | params = params_get(chatid) 274 | if params is None or params == {}: 275 | params = { 276 | 'model': 'gpt-3.5-turbo', 277 | 'max_tokens': 1024 FOR gpt-3.5-turbo AND 2048 FOR gpt-4, ############## REDACTED 278 | 'frequency_penalty': 0.5, 279 | 'presence_penalty': 0.5, 280 | 'temperature': 0.5, 281 | 'translate_target': "None", 282 | 'debug': 'off', 283 | 'timeout': 120, 284 | 'trim': 'on', 285 | 'messages': [ 286 | {'role': 'system', 'content': 'You are a helpful assistant'} 287 | ] 288 | } 289 | 290 | # logging.info("params: {}".format(params)) 291 | 292 | model = params['model'] 293 | frequency_penalty = params['frequency_penalty'] 294 | presence_penalty = params['presence_penalty'] 295 | max_tokens = params['max_tokens'] 296 | temperature = params['temperature'] 297 | translate_target = params['translate_target'] 298 | messages = params['messages'].copy() 299 | debug = params['debug'] 300 | timeout = params['timeout'] 301 | trim = params['trim'] 302 | 303 | if debug == "on": 304 | logging.info(json.dumps(update, indent=4)) 305 | 306 | # Prevent same update_id retry (mostly caused by openai API timeout) - works 307 | if 'last_update_id' in params and update_id == params['last_update_id']: 308 | logging.info("Same update_id: {}. Ignore".format(update_id)) 309 | send_message(chatid, "Same update_id. Ignore") 310 | return "OK" 311 | 312 | params['last_update_id'] = update_id 313 | params_set(chatid, params) 314 | 315 | 316 | # Telegram sends a long message as multiple message snippets that is around 4096 bytes long. 317 | # Try to combine them into one message 318 | if len(message) >= 4090: 319 | if len(message) < 4096: 320 | message = message + " " 321 | if 'last_message' in params and len(params['last_message']) > 0: 322 | message = params['last_message'] + message 323 | params['last_message'] = message 324 | params_set(chatid, params) 325 | send_message(chatid, "*** Message too long. Wait for next snippet ***") 326 | return "OK" 327 | elif 'last_message' in params and len(params['last_message']) > 0: 328 | message = params['last_message'] + message 329 | params['last_message'] = "" 330 | params_set(chatid, params) 331 | 332 | if stt_text is not None: 333 | send_message(chatid, "STT: {}".format(stt_text)) 334 | 335 | if translate_target != "None": 336 | tclient = translate.Client() 337 | 338 | update_msg = True 339 | saveHistory = False 340 | 341 | # Bot's / command parsing and handling 342 | if message.startswith("%%"): 343 | message = message.replace("%%", "/") 344 | if message.startswith("/") or message == "--" or message == "++" or message == "-ko" or message == "--ko" or message.startswith("http://") or message.startswith("https://"): 345 | if message == "/clear" or message == "/c" or message == "--": 346 | messages = [ 347 | {'role': 'system', 'content': 'You are a helpful assistant'} 348 | ] 349 | params['messages'] = messages 350 | params_set(chatid, params) 351 | send_message(chatid, "Clear messages") 352 | return 'OK' 353 | elif message == "/topic" or message == "/topics" or message == "/t": 354 | message = "list up topics we have discussed" 355 | update_msg = False 356 | elif message == "/save" or message == "/s": 357 | message = "what is a title based on our conversation?" 358 | update_msg = False 359 | saveHistory = True 360 | elif message == "/list" or message == "/l": 361 | history = get_saved_history(chatid) 362 | if history is None or history == {}: 363 | message = "No saved history" 364 | else: 365 | message = "Saved history:\n" 366 | idx = 1 367 | for key in history: 368 | message += "{}: {} at {}\n".format(idx, history[key]['title'], key) 369 | idx += 1 370 | send_message(chatid, message) 371 | return 'OK' 372 | elif message.startswith("/load"): 373 | history = get_saved_history(chatid) 374 | if history is None or history == {}: 375 | message = "No saved history" 376 | else: 377 | if message == "/load": 378 | message = "which history do you want to load? (1, 2, 3, ...)" 379 | else: 380 | try: 381 | idx = int(message[6:].strip()) 382 | print("idx: {}, len: {}".format(idx, len(history))) 383 | if idx > 0 and idx <= len(history): 384 | key = list(history.keys())[idx-1] 385 | load_history(chatid, key) 386 | message = "Loaded history: {}".format(history[key]['title']) 387 | else: 388 | message = "Invalid index" 389 | except Exception as e: 390 | logging.exception(e) 391 | message = "Invalid index" 392 | send_message(chatid, message) 393 | return 'OK' 394 | elif message.startswith("/params"): 395 | if message == "/params": 396 | message = "frequency_penalty: {}\npresence_penalty: {}\nmax_tokens: {}\ntemperature: {}\nmodel: {}".format(frequency_penalty, presence_penalty, max_tokens, temperature, model) 397 | send_message(chatid, message) 398 | return 'OK' 399 | elif message[8:].startswith("frequency_penalty"): 400 | frequency_penalty = float(message[8:].split(" ")[1]) 401 | params['frequency_penalty'] = frequency_penalty 402 | params_set(chatid, params) 403 | message = "frequency_penalty: {}".format(frequency_penalty) 404 | send_message(chatid, message) 405 | return 'OK' 406 | elif message[8:].startswith("presence_penalty"): 407 | presence_penalty = float(message[8:].split(" ")[1]) 408 | params['presence_penalty'] = presence_penalty 409 | params_set(chatid, params) 410 | message = "presence_penalty: {}".format(presence_penalty) 411 | send_message(chatid, message) 412 | return 'OK' 413 | elif message[8:].startswith("max_tokens"): 414 | max_tokens = int(message[8:].split(" ")[1]) 415 | params['max_tokens'] = max_tokens 416 | params_set(chatid, params) 417 | message = "max_tokens: {}".format(max_tokens) 418 | send_message(chatid, message) 419 | return 'OK' 420 | elif message[8:].startswith("temperature"): 421 | temperature = float(message[8:].split(" ")[1]) 422 | params['temperature'] = temperature 423 | params_set(chatid, params) 424 | message = "temperature: {}".format(temperature) 425 | send_message(chatid, message) 426 | return 'OK' 427 | elif message[8:].startswith("model"): 428 | model = message[8:].split(" ")[1] 429 | params['model'] = model 430 | params_set(chatid, params) 431 | message = "Model is {}".format(model) 432 | send_message(chatid, message) 433 | return 'OK' 434 | elif message[8:] == "reset": 435 | params_set(chatid, None) 436 | message = "Reset parameters. Check current params with /params command" 437 | send_message(chatid, message) 438 | return 'OK' 439 | else: 440 | send_message(chatid, "Unknown params") 441 | send_message(chatid, "Available params are: frequency_penalty, presence_penalty, max_tokens, temperature, model, reset") 442 | return 'OK' 443 | elif message.startswith("/system"): 444 | if message == "/system": 445 | message = "System message is {}".format(messages[0]) 446 | send_message(chatid, message) 447 | return 'OK' 448 | elif message[8:] == "reset": 449 | messages[0]['content'] = "You are a helpful assistant" 450 | params['messages'] = messages 451 | params_set(chatid, params) 452 | send_message(chatid, "Reset system message as {}".format(messages[0])) 453 | return 'OK' 454 | else: 455 | messages[0]['content'] = message[8:] 456 | params['messages'] = messages 457 | params_set(chatid, params) 458 | send_message(chatid, "Set system message as {}".format(messages[0])) 459 | return 'OK' 460 | elif message.startswith("/translate"): 461 | if message == "/translate": 462 | message = "Translate target language is {}".format(translate_target if translate_target else "None") 463 | send_message(chatid, message) 464 | return 'OK' 465 | else: 466 | language = message[10:].strip() 467 | if language.lower() == "none": 468 | translate_target = "None" 469 | else: 470 | translate_target = language 471 | 472 | params['translate_target'] = translate_target 473 | params_set(chatid, params) 474 | message = "Translate target language is {}".format(translate_target) 475 | 476 | send_message(chatid, message) 477 | return 'OK' 478 | elif message == "/history" or message == "/h" or message == "++": 479 | message = "History messages are: \n" 480 | for msg in messages: 481 | message += "{}: {}\n".format(msg['role'], msg['content']) 482 | if not send_message(chatid, message): 483 | message = "Truncated history messages are: \n" 484 | for msg in messages: 485 | message += "{}: {}\n".format(msg['role'], msg['content'] if len(msg['content']) < 200 else msg['content'][:200] + "...") 486 | send_message(chatid, message) 487 | 488 | return 'OK' 489 | elif message.startswith("/model"): 490 | if message == "/model": 491 | message = "Model is {}".format(model) 492 | send_message(chatid, message) 493 | return 'OK' 494 | else: 495 | model = message[7:].strip() 496 | params['model'] = model 497 | params_set(chatid, params) 498 | message = "Model is {}".format(model) 499 | send_message(chatid, message) 500 | return 'OK' 501 | elif message.startswith("/debug"): 502 | if message == "/debug": 503 | message = "Debug mode is {}".format(debug) 504 | send_message(chatid, message) 505 | return 'OK' 506 | else: 507 | debug = message[7:].strip().lower() 508 | if debug not in ["on", "off"]: 509 | send_message(chatid, "Debug mode should be on or off") 510 | return 'OK' 511 | params['debug'] = debug 512 | params_set(chatid, params) 513 | message = "Debug mode is {}".format(debug) 514 | send_message(chatid, message) 515 | return 'OK' 516 | elif message.startswith("/timeout"): 517 | if message == "/timeout": 518 | message = "Timeout is {}".format(timeout) 519 | send_message(chatid, message) 520 | return 'OK' 521 | else: 522 | timeout = int(message[9:].strip()) 523 | params['timeout'] = timeout 524 | params_set(chatid, params) 525 | message = "Timeout is {}".format(timeout) 526 | send_message(chatid, message) 527 | return 'OK' 528 | elif message.startswith("/trim"): 529 | if message == "/trim": 530 | message = "Trim is {}".format(trim) 531 | send_message(chatid, message) 532 | return 'OK' 533 | else: 534 | trim = message[6:].strip() 535 | if trim not in ["on", "off"]: 536 | send_message(chatid, "Trim should be on or off") 537 | return 'OK' 538 | params['trim'] = trim 539 | params_set(chatid, params) 540 | message = "Trim is {}".format(trim) 541 | send_message(chatid, message) 542 | return 'OK' 543 | elif (message.startswith("/sum") and message.split(" ")[0] in ["/sum", "/summarize", "/summary"] and len(message.split(" ")) > 1) or (message.startswith("http://") or message.startswith("https://")): 544 | if message.startswith("/sum"): 545 | url = message.split(" ")[1] 546 | if len(message.split(" ")) > 2: 547 | length = int(message.split(" ")[2]) 548 | else: 549 | length = 10 550 | else: 551 | url = message 552 | length = 10 553 | 554 | headers = { 555 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' 556 | } 557 | 558 | response = requests.get(url, headers=headers) 559 | 560 | if response.status_code == 200: 561 | html = response.text 562 | soup = BeautifulSoup(html, 'html.parser') 563 | 564 | # get the title 565 | title = soup.select_one('title').get_text().strip() 566 | 567 | # get the body 568 | body = soup.select_one('body') 569 | 570 | # list the first level children of the body 571 | children = body.findChildren() 572 | content = '' 573 | for child in children: 574 | if child.name == 'p': 575 | content += child.get_text().strip() + '\n' 576 | 577 | language = detect(content) 578 | if language == 'en': 579 | message = "summarize this as {} bullet points:\n{}\n{}".format(length, title, content) 580 | elif language == 'ko': 581 | message = "요약을 {} 개의 문장으로 해주세요:\n{}\n{}".format(length, title, content) 582 | else: 583 | message = "summarize this as {} bullet points and answer in its language:\n{}\n{}".format(length, title, content) 584 | else: 585 | send_message(chatid, "Failed to get the content of the url. Message is {} {}".format(response.status_code, response.text)) 586 | return 'OK' 587 | elif message == "/ko" or message == "-ko" or message == "--ko": 588 | message = "위의 응답을 한국어로 번역해주세요" 589 | elif message == "/info" or message == "/version": 590 | send_message(chatid, get_system_info()) 591 | return 'OK' 592 | elif message == "/gpt3": 593 | model = "gpt-3.5-turbo" 594 | max_tokens = 1024 595 | params['model'] = model 596 | params['max_tokens'] = max_tokens 597 | params_set(chatid, params) 598 | message = "Model is {} and max_tokens is {}".format(model, max_tokens) 599 | send_message(chatid, message) 600 | return 'OK' 601 | elif message == "/gpt4": 602 | model = "gpt-4" 603 | max_tokens = 2048 604 | params['model'] = model 605 | params['max_tokens'] = max_tokens 606 | params_set(chatid, params) 607 | message = "Model is {} and max_tokens is {}".format(model, max_tokens) 608 | send_message(chatid, message) 609 | return 'OK' 610 | elif message == "/reset": 611 | params_set(chatid, {}) 612 | message = "Reset parameters. Check current params with /params and /model command" 613 | send_message(chatid, message) 614 | return 'OK' 615 | else: 616 | help_message = "Available commands are: /help, /clear, /topic, /params, /system, /translate, /history, /model, ... \nhttps://javalove93.github.io/telegram-gptbot/index.html" 617 | if message == "/help": 618 | send_message(chatid, help_message) 619 | return 'OK' 620 | 621 | send_message(chatid, "Unknown command") 622 | send_message(chatid, help_message) 623 | return 'OK' 624 | 625 | # Process the message and send a response 626 | if translate_target != "None": 627 | message = tclient.translate(message, target_language='en')['translatedText'] 628 | messages.append({'role': 'user', 'content': message}) 629 | if debug == "on": 630 | logging.info(json.dumps(messages, indent=4)) 631 | 632 | original_msg_len = len(message.splitlines()) 633 | 634 | while True: 635 | try: 636 | api_or_rest = "rest" 637 | if api_or_rest == "api": 638 | response = openai.ChatCompletion.create( 639 | model=model, 640 | messages=messages, 641 | temperature=temperature, 642 | max_tokens=max_tokens, 643 | top_p=1, 644 | frequency_penalty=frequency_penalty, 645 | presence_penalty=presence_penalty, 646 | stop=None, 647 | timeout=timeout # seconds - doesn't work 648 | ) 649 | else: 650 | headers = { 651 | "Content-Type": "application/json", 652 | "Authorization": "Bearer " + openai_apikey 653 | } 654 | body = { 655 | "model": model, 656 | "messages": messages, 657 | "temperature": temperature, 658 | "max_tokens": max_tokens, 659 | "top_p": 1, 660 | "frequency_penalty": frequency_penalty, 661 | "presence_penalty": presence_penalty, 662 | "stop": None 663 | } 664 | if debug == "on": 665 | logging.info("------ REST API Request Body ------") 666 | logging.info(json.dumps(body, indent=4)) 667 | response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=body, timeout=timeout) 668 | if response.status_code != 200: 669 | # throw exception 670 | raise Exception("OpenAI API error: {}, {}".format(response.status_code, response.text)) 671 | 672 | response = response.json() 673 | break 674 | except Exception as e: 675 | logging.exception(e) 676 | 677 | full_stack_error_msg = traceback.format_exc() 678 | # ERROR:root:This model's maximum context length is 4097 tokens. However, you requested 5913 tokens (3865 in the messages, 2048 in the completion). 679 | # Please reduce the length of the messages or completion. 680 | too_many_tokens_error = "This model's maximum context length is" 681 | if str(e).find(too_many_tokens_error) != -1: 682 | # Since max_tokens exceed error, retry after trim unnecessary history and message 683 | # remove the second array element of messages 684 | messages.pop(1) 685 | message_trimed = "*** Old history is trimmed. ***" 686 | if len(messages) < 2: 687 | # remove the last line of message 688 | message = message[:message.rfind('\n')] 689 | messages.append({'role': 'user', 'content': message}) 690 | # number of line size of message 691 | message_line_size = len(message.splitlines()) 692 | message_trimed = "*** Later part of the message is trimmed: {} into {}. ***".format(original_msg_len, message_line_size) 693 | 694 | continue 695 | 696 | send_message(chatid, "OpenAI API error: {}".format(e)) 697 | return 'OK' 698 | 699 | if debug == "on": 700 | logging.info(json.dumps(response, indent=4)) 701 | 702 | message = response['choices'][0]['message']['content'] 703 | if translate_target != "None": 704 | message = tclient.translate(message, target_language=translate_target)['translatedText'] 705 | messages.append({'role': 'assistant', 'content': message}) 706 | if update_msg: 707 | params['messages'] = messages 708 | params_set(chatid, params) 709 | if saveHistory: 710 | # get quotated string from message into title and remove quote mark 711 | title = re.findall(r'"([^"]*)"', message)[0] 712 | save_history(chatid, title) 713 | message = "History saved as title, \"{}\"".format(title) 714 | if debug == "on": 715 | response_text = get_system_info() + "\n" + message 716 | else: 717 | response_text = message 718 | else: 719 | response_text = "GPT: You're not welcomed to use this bot. {}".format(chatid) 720 | 721 | if message_trimed is not None: 722 | response_text = message_trimed + "\n" + response_text 723 | send_message(chatid, response_text) 724 | except Exception as e: 725 | logging.exception(e) 726 | 727 | full_stack_error_msg = traceback.format_exc() 728 | logging.error(full_stack_error_msg) 729 | if debug == "on": 730 | send_message(chatid, full_stack_error_msg) 731 | logging.error(json.dumps(update, indent=4)) 732 | else: 733 | send_message(chatid, "Error: {}".format(str(e))) 734 | 735 | return 'OK' 736 | 737 | def send_message(chatid, text): 738 | try: 739 | # Send a message to the user using the Telegram Bot API 740 | url = "https://api.telegram.org/bot{}/sendMessage".format(bot_token) 741 | data = { 742 | "chat_id": chatid, 743 | "text": text 744 | } 745 | logging.info("Sending message to {}: {}".format(chatid, text)) 746 | response = requests.post(url, json=data) 747 | response.raise_for_status() 748 | return True 749 | except Exception as e: 750 | logging.exception(e) 751 | full_stack_error_msg = traceback.format_exc() 752 | logging.error(full_stack_error_msg) 753 | 754 | logging.info("data: {}".format(json.dumps(data, indent=4))) 755 | return False 756 | 757 | @app.route('/channels', methods=['GET']) 758 | def channels(): 759 | channel_id = request.args.get('chat_id') 760 | 761 | def get_channel_updates(offset=None): 762 | # Get updates from the Telegram Bot API 763 | url = "https://api.telegram.org/bot{}/getUpdates".format(bot_token) 764 | params = {'chat_id': channel_id, 'offset': offset} 765 | response = requests.get(url, params=params) 766 | if response.status_code != 200: 767 | raise Exception(f'Error retrieving updates: {response.status_code} {response.text}') 768 | return response.json().get('result', []) 769 | 770 | # delete webhook 771 | url = "https://api.telegram.org/bot{}/deleteWebhook".format(bot_token) 772 | response = requests.post(url) 773 | response.raise_for_status() 774 | 775 | latest_message_id = None 776 | 777 | while True: 778 | updates = get_channel_updates(offset=latest_message_id) 779 | for update in updates: 780 | message = update.get('message', {}) 781 | message_id = message.get('message_id') 782 | text = message.get('text') 783 | # process the message as needed 784 | print(f'Received message {message_id}: {text}') 785 | if message_id: 786 | latest_message_id = message_id + 1 787 | 788 | # set webhook again 789 | url = os.environ.get('URL') 790 | url = "https://api.telegram.org/bot{}/setWebhook?url={}/webhook".format(bot_token, url) 791 | response = requests.post(url) 792 | response.raise_for_status() 793 | 794 | @app.route('/setWebhook', methods=['POST']) 795 | def set_webhook(): 796 | global bot_token 797 | 798 | # set webhook 799 | url = request.form.get('url') 800 | url = "https://api.telegram.org/bot{}/setWebhook?url={}/webhook".format(bot_token, url) 801 | response = requests.post(url) 802 | response.raise_for_status() 803 | 804 | return 'OK' 805 | 806 | if __name__ == '__main__': 807 | # Configure the webhook for your bot 808 | # if URL exists 809 | if os.environ.get('URL'): 810 | url = os.environ.get('URL') 811 | url = "https://api.telegram.org/bot{}/setWebhook?url={}/webhook".format(bot_token, url) 812 | response = requests.post(url) 813 | response.raise_for_status() 814 | 815 | # Start the Flask app 816 | app.run() 817 | 818 | --------------------------------------------------------------------------------