├── src ├── yikes │ ├── __init__.py │ ├── borges-and-ai.pdf │ ├── static │ │ ├── favicon.ico │ │ ├── style.css │ │ ├── okay.html │ │ └── index.html │ ├── manifesto_futurista.pdf │ ├── cli.py │ ├── summarize.py │ └── embed.py └── x_x │ ├── __init__.py │ ├── cli.py │ └── splashes │ └── sword.txt ├── .gitignore ├── pyproject.toml ├── README.md ├── tests └── x_x │ ├── cli_test.py │ └── x_x_test.py ├── setup.cfg ├── LICENSE └── UNLICENSE /src/yikes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.swp 3 | .DS_Store 4 | __pycache__ 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /src/yikes/borges-and-ai.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whatever/call-me-fred-flintstone/main/src/yikes/borges-and-ai.pdf -------------------------------------------------------------------------------- /src/yikes/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whatever/call-me-fred-flintstone/main/src/yikes/static/favicon.ico -------------------------------------------------------------------------------- /src/yikes/manifesto_futurista.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whatever/call-me-fred-flintstone/main/src/yikes/manifesto_futurista.pdf -------------------------------------------------------------------------------- /src/yikes/static/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: sans-serif; 3 | font-size: 10pt; 4 | } 5 | 6 | #preview-image { 7 | max-width: 300px 8 | } 9 | -------------------------------------------------------------------------------- /src/yikes/static/okay.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | let it go 7 | 8 | 9 |
10 | 12 | 13 | 14 | 16 | 17 | 19 |
20 | 21 | 22 | -------------------------------------------------------------------------------- /src/x_x/__init__.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | EYES = [ 5 | "x", 6 | "X", 7 | "O", 8 | "o", 9 | "0", 10 | "U", 11 | "u", 12 | "@", 13 | "*", 14 | "^", 15 | "-", 16 | ] 17 | 18 | 19 | def x_x(): 20 | """Return a random face with an eye.""" 21 | return random.choice(EYES) + "_" + random.choice(EYES) 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # :cake: call me fred flintstone (i can make your bed rock) 2 | 3 | non-sense project to start integrating bedrock stuff into a webapp 4 | 5 | 6 | ## Install 7 | 8 | ```bash 9 | pip3 intall -e . 10 | pip3 install -e ".[dev]" 11 | ``` 12 | 13 | 14 | ## Test 15 | 16 | WIP: Should this be tox nowadays? 17 | 18 | ```bash 19 | pytest 20 | ``` 21 | 22 | 23 | ## Lint 24 | 25 | WIP: What should this be? 26 | 27 | ```bash 28 | flake8 src 29 | ``` 30 | -------------------------------------------------------------------------------- /tests/x_x/cli_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from x_x.cli import ( 4 | main, 5 | splash, 6 | ) 7 | 8 | 9 | class TestCLI(unittest.TestCase): 10 | """Test that the CLI tool still works""" 11 | 12 | def test_splash(self): 13 | """Test whether the splash screen returns""" 14 | 15 | self.assertTrue(splash("sword")) 16 | 17 | def test_main(self): 18 | """Test whether the main function runs""" 19 | 20 | main() 21 | -------------------------------------------------------------------------------- /tests/x_x/x_x_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | from x_x import x_x 5 | 6 | 7 | class TestX_X(unittest.TestCase): 8 | """Test x_x""" 9 | 10 | def test_x_x_structure(self): 11 | """Test whether x_x() is structured correctly.""" 12 | 13 | for i in range(1000): 14 | smiley = x_x() 15 | self.assertEqual(len(smiley), 3) 16 | self.assertEqual(smiley[1], "_") 17 | self.assertIn(smiley[0], "xXOo0Uu@*^-") 18 | self.assertIn(smiley[2], "xXOo0Uu@*^-") 19 | 20 | 21 | if __name__ == "__main__": 22 | unittest.main() 23 | -------------------------------------------------------------------------------- /src/x_x/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path 3 | 4 | 5 | from x_x import x_x 6 | 7 | 8 | def splash(choice): 9 | """Return a splash screen""" 10 | 11 | fname = os.path.join( 12 | os.path.dirname(__file__), 13 | "splashes", 14 | f"{choice}.txt", 15 | ) 16 | 17 | with open(fname, "r") as fi: 18 | return fi.read() 19 | 20 | 21 | def main(): 22 | """CLI stub""" 23 | 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument("--splash", action="store_true", help="show splash screen") 26 | args = parser.parse_args() 27 | 28 | if args.splash: 29 | print(splash("sword")) 30 | else: 31 | print(x_x()) 32 | -------------------------------------------------------------------------------- /src/x_x/splashes/sword.txt: -------------------------------------------------------------------------------- 1 | ,--. 2 | { } 3 | K, } 4 | / ~Y` 5 | , / / 6 | {_'-K.__/ 7 | `/-.__L._ 8 | / ' /`\_} 9 | / ' / 10 | ____ / ' / 11 | ,-'~~~~ ~~/ ' /_ 12 | ,' ``~~~ ', 13 | ( Y 14 | { I 15 | { - `, 16 | | ', ) 17 | | | ,..__ __. Y 18 | | .,_./ Y ' / ^Y J )| 19 | \ |' / | | || 20 | \ L_/ . _ (_,.'( 21 | \, , ^^""' / | ) 22 | \_ \ /,L] / 23 | '-_~-, ` ` ./` 24 | `'{_ ) 25 | ^^\..___,.--` ZEUS 26 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = x_x 3 | version = 1.0.0 4 | author = Matt <3 5 | author_email = matt@worldshadowgovernment.com 6 | url = https://github.com/whatever/x_x 7 | description = wip 8 | long_description = file: README.md 9 | long_description_content_type = text/markdown 10 | keywords = x_x 11 | license = UNLICENSE 12 | classifiers = 13 | Programming Language :: Python :: 3 14 | License :: UNLICENSE 15 | 16 | [options] 17 | package_dir = 18 | = src 19 | packages = find: 20 | install_requires = 21 | aiohttp >= 3.0.0 22 | boto3 >= 1.0.0 23 | beautifulsoup4 >= 4.0.0 24 | requests >= 2.0.0 25 | 26 | [options.packages.find] 27 | where = src 28 | exclude = 29 | examples* 30 | tools* 31 | docs* 32 | x_x.tests* 33 | 34 | [options.entry_points] 35 | console_scripts = 36 | x_x = x_x.cli:main 37 | borges-conversation = yikes.cli:main 38 | 39 | [options.package_data] 40 | * = README.md 41 | 42 | [options.extras_require] 43 | dev = 44 | pytest >= 7.0.0 45 | flake8 >= 7.0.0 46 | 47 | [flake8] 48 | max-line-length = 120 49 | 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /src/yikes/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 18 | 19 |

Punch in a URL and scrape it (:

20 |
21 | 22 |
23 | 24 |
25 |
26 |
27 | 28 |
29 |

Results

30 | 31 | 32 |
33 |
34 | 35 |
36 |
37 | 38 | 56 |
57 | 58 | 59 | -------------------------------------------------------------------------------- /src/yikes/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import boto3 3 | import json 4 | import os 5 | from x_x.cli import splash 6 | 7 | 8 | from yikes.embed import mess_with_embedding 9 | from yikes.summarize import summarize_url 10 | 11 | 12 | from aiohttp import web 13 | 14 | 15 | HERE = os.path.realpath(os.path.dirname(__file__)) 16 | 17 | 18 | def webapp(): 19 | 20 | async def root_handler(request): 21 | return web.FileResponse(f"{HERE}/static/index.html") 22 | 23 | async def query_preview_handler(request): 24 | return web.Respose(text="Preview") 25 | 26 | async def query_handler(request): 27 | params = await request.json() 28 | query = params["query"] 29 | return web.Response(text=query_llama(query)) 30 | 31 | async def url_preview_handler(request): 32 | """Return a json response with a summary of the url""" 33 | 34 | print("preview handler") 35 | 36 | url = request.query.get("url") 37 | 38 | if not url: 39 | resp = {"error": "No URL provided"} 40 | else: 41 | resp = summarize_url(url) 42 | 43 | print("finished summarizing... responding") 44 | 45 | return web.json_response(resp) 46 | 47 | app = web.Application() 48 | 49 | app.add_routes([ 50 | web.get("/", root_handler), 51 | web.get("/scrape", url_preview_handler), 52 | web.post("/query", query_handler), 53 | web.get("/preview", query_preview_handler), 54 | web.static("/", f"{HERE}/static"), 55 | ]) 56 | 57 | return app 58 | 59 | 60 | 61 | 62 | def main(): 63 | """Serve website to query LLaMa""" 64 | 65 | print(splash("sword")) 66 | 67 | mess_with_embedding() 68 | 69 | return 70 | 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument("--port", type=int, default=8181) 73 | # parser.add_argument("--temperature", type=float, default=0.5) 74 | # parser.add_argument("--top-p", type=float, default=0.9) 75 | # parser.add_argument("--max-gen-len", type=int, default=2**8) 76 | args = parser.parse_args() 77 | 78 | app = webapp() 79 | 80 | web.run_app( 81 | app, 82 | print=None, 83 | port=args.port, 84 | ) 85 | -------------------------------------------------------------------------------- /src/yikes/summarize.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | import logging 4 | import requests 5 | 6 | from bs4 import BeautifulSoup 7 | 8 | 9 | def fetch(url): 10 | """Return the HTML from a URL""" 11 | 12 | resp = requests.get(url) 13 | return resp.text 14 | 15 | 16 | def extract_text_from_html(html): 17 | """Return readable text from HTML""" 18 | 19 | resp = BeautifulSoup(html, "html.parser") 20 | 21 | for script in resp(["script", "style"]): 22 | script.extract() 23 | 24 | text = resp.get_text() 25 | 26 | lines = ( 27 | line.strip() 28 | for line in text.splitlines() 29 | ) 30 | 31 | chunks = ( 32 | phrase.strip() 33 | for line in lines 34 | for phrase in line.split(" ") 35 | ) 36 | 37 | return "\n".join( 38 | chunk 39 | for chunk in chunks 40 | if chunk 41 | if len(chunk) > 30 42 | ) 43 | 44 | 45 | def query_jurassic(prompt): 46 | """Return generated text from bedrock""" 47 | 48 | client = boto3.client("bedrock-runtime") 49 | 50 | prompt = "\n".join([ 51 | # "{text}Summarize this block of text with 5 bullet points:{/text}", 52 | # prompt, 53 | "The following is text from a website:", 54 | prompt, 55 | "Summarize the website with 5 bullet points:", 56 | ]) 57 | 58 | model_kwargs = { #AI21 59 | "prompt": prompt, 60 | "maxTokens": 3000, 61 | "temperature": 0.3, 62 | "topP": 0.7, 63 | "stopSequences": [], 64 | "countPenalty": {"scale": 0 }, 65 | "presencePenalty": {"scale": 0 }, 66 | "frequencyPenalty": {"scale": 0 } 67 | } 68 | 69 | resp = client.invoke_model( 70 | modelId="ai21.j2-ultra-v1", 71 | body=json.dumps(model_kwargs), 72 | ) 73 | 74 | resp = resp["body"].read().decode("utf-8") 75 | resp = json.loads(resp) 76 | 77 | result = "" 78 | 79 | for chunk in resp["completions"]: 80 | result += chunk["data"]["text"] 81 | 82 | return result 83 | 84 | 85 | from collections import namedtuple 86 | 87 | Summarization = namedtuple( 88 | "Summarization", 89 | ["url", "title", "image", "summary"], 90 | ) 91 | 92 | logging.basicConfig() 93 | logger = logging.getLogger(__name__) 94 | logger.setLevel(logging.INFO) 95 | 96 | 97 | def summarize_url(url): 98 | """Return a text summary of a URL""" 99 | 100 | logger.info("fetching %url", url) 101 | html = fetch(url) 102 | 103 | logger.info("extracting text from HTML") 104 | text = extract_text_from_html(html) 105 | 106 | logger.info("summarizing text with bedrock") 107 | summary = query_jurassic(text[:10_000]).strip() 108 | 109 | soup = BeautifulSoup(html, "html.parser") 110 | title = soup.find("meta", property="og:title") 111 | image = soup.find("meta", property="og:image") 112 | description = soup.find("meta", property="og:description") 113 | 114 | return { 115 | "url": url, 116 | "title": title["content"] if title else "n/a", 117 | "image": image["content"] if image else None, 118 | "description": description["content"] if description else "n/a", 119 | "summary": summary, 120 | } 121 | -------------------------------------------------------------------------------- /src/yikes/embed.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | 3 | from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 4 | 5 | from langchain.chains import ConversationalRetrievalChain 6 | from langchain.indexes.vectorstore import VectorstoreIndexCreator 7 | from langchain.memory import ConversationBufferWindowMemory 8 | from langchain.text_splitter import RecursiveCharacterTextSplitter 9 | from langchain_community.document_loaders import PyPDFLoader 10 | from langchain_community.embeddings import BedrockEmbeddings 11 | from langchain_community.llms import Bedrock 12 | from langchain_community.vectorstores import FAISS 13 | 14 | import json 15 | 16 | from pathlib import Path 17 | 18 | import unittest.mock as mock 19 | import pdb 20 | 21 | import boto3 22 | import botocore 23 | # import BedrockRuntime.Client.invoke_model(**kwargs) 24 | 25 | 26 | def fullname(o): 27 | klass = o.__class__ 28 | module = klass.__module__ 29 | if module == 'builtins': 30 | return klass.__qualname__ # avoid outputs like 'builtins.str' 31 | return module + '.' + klass.__qualname__ 32 | 33 | 34 | client = boto3.client("bedrock-runtime") 35 | 36 | 37 | HERE = Path(__file__).parent 38 | 39 | 40 | def get_llm(): 41 | 42 | model_kwargs = { #AI21 43 | "maxTokens": 1024, 44 | "temperature": 0, 45 | "topP": 0.5, 46 | "stopSequences": ["Human:"], 47 | "countPenalty": {"scale": 0 }, 48 | "presencePenalty": {"scale": 0 }, 49 | "frequencyPenalty": {"scale": 0 }, 50 | } 51 | 52 | model_kwargs = { #Meta 53 | "temperature": 0, 54 | "top_p": 0.5, 55 | "max_gen_len": 2**7, 56 | } 57 | 58 | return Bedrock( 59 | # model_id="ai21.j2-ultra-v1", 60 | # model_id="meta.llama2-70b-v1", 61 | model_id="meta.llama2-70b-chat-v1", 62 | model_kwargs=model_kwargs, 63 | # streaming=True, 64 | # callbacks=[StreamingStdOutCallbackHandler()], 65 | ) 66 | 67 | 68 | def get_index(): 69 | """Return ...""" 70 | 71 | embeddings = BedrockEmbeddings() 72 | 73 | pdf_path = str(HERE / "borges-and-ai.pdf") 74 | # pdf_path = str(HERE / "manifesto_futurista.pdf") 75 | 76 | loader = PyPDFLoader(file_path=pdf_path) 77 | 78 | text_splitter = RecursiveCharacterTextSplitter( 79 | separators=["\n\n", "\n", ".", " "], 80 | chunk_size=1000, 81 | chunk_overlap=100, 82 | ) 83 | 84 | index_creator = VectorstoreIndexCreator( 85 | vectorstore_cls=FAISS, 86 | embedding=embeddings, 87 | text_splitter=text_splitter, 88 | ) 89 | 90 | return index_creator.from_loaders([loader]) 91 | 92 | 93 | def get_memory(): 94 | """Return ...""" 95 | 96 | return ConversationBufferWindowMemory( 97 | memory_key="chat_history", 98 | return_messages=True, 99 | ) 100 | 101 | 102 | 103 | def MUTATE_invoke_model(client): 104 | """ 105 | Modify the boto3 client to print out the prompt and response. 106 | NOTE: This is a hacky way to do this. It would be better if we coulduse a callback handler passed into Bedrock... 107 | """ 108 | def inspect_boto_calls(*args, **kwargs): 109 | body = kwargs["body"] 110 | body = json.loads(body) 111 | print_aside(kwargs["modelId"]) 112 | print_aside(body["prompt"]) 113 | 114 | resp = client.__invoke_model(*args, **kwargs) 115 | resp_body = resp["body"].read() 116 | resp["body"].read = mock.MagicMock(return_value=resp_body) 117 | 118 | try: 119 | r = json.loads(resp_body.decode("utf8"))["generation"] 120 | # r = json.loads(resp_body.decode("utf-8"))["completions"][0]["data"]["text"] 121 | print_aside(r) 122 | except: 123 | print_aside(resp_body) 124 | 125 | return resp # client.__invoke_model(*args, **kwargs) 126 | client.__invoke_model = client.invoke_model 127 | client.invoke_model = inspect_boto_calls 128 | 129 | TICKER = 0 130 | COLORS = ["94", "96", "92"] 131 | 132 | def print_aside(x): 133 | global TICKER 134 | color = COLORS[TICKER % len(COLORS)] 135 | TICKER += 1 136 | print(f"\n\033[{color}m{x}\033[0m\n") 137 | 138 | 139 | def get_rag_chat_response(input_text, memory, index): 140 | 141 | llm = get_llm() 142 | 143 | def get_rag_chat_response(input_text, memory, index): 144 | 145 | llm = get_llm() 146 | 147 | # Modify the llm's boto3 client to print out the prompt and response 148 | MUTATE_invoke_model(llm.client) 149 | 150 | conversation_with_retrieval = ConversationalRetrievalChain.from_llm( 151 | llm, 152 | index.vectorstore.as_retriever(), 153 | memory=memory, 154 | ) 155 | 156 | chat_response = conversation_with_retrieval.invoke({"question": input_text}) 157 | 158 | return chat_response 159 | 160 | 161 | def mess_with_embedding(): 162 | 163 | 164 | """ 165 | bedrock = boto3.client(service_name='bedrock', region_name='us-east-1') 166 | listModels = bedrock.list_foundation_models(byProvider='meta') 167 | print(json.dumps(listModels, indent=2)) 168 | return 169 | """ 170 | 171 | mem = get_memory() 172 | 173 | ind = get_index() 174 | 175 | try: 176 | while True: 177 | print("\033[31m>\033[0m", end="", flush=True) 178 | line = input(" ").strip() 179 | 180 | if not line: 181 | continue 182 | 183 | res = get_rag_chat_response(line, mem, ind) 184 | 185 | print() 186 | print(res["answer"]) 187 | print() 188 | 189 | except (KeyboardInterrupt, EOFError): 190 | print("Exiting...") 191 | raise SystemExit(0) 192 | --------------------------------------------------------------------------------