├── .devcontainer └── devcontainer.json ├── .gitignore ├── LICENSE ├── README.md ├── helper └── helper.py ├── notebooks ├── Text Summary based on sumy lib.ipynb ├── Text Summary using Langchain.ipynb ├── Text summary based on transformers.ipynb ├── Text summary based on training a pre existing model.ipynb └── Text summary using python.ipynb ├── python_algo.py ├── requirements.txt ├── streamlit.py ├── sumy_lib_based_summary.py └── transformers_based_summary.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 4 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", 5 | "customizations": { 6 | "codespaces": { 7 | "openFiles": [ 8 | "README.md", 9 | "streamlit.py" 10 | ] 11 | }, 12 | "vscode": { 13 | "settings": {}, 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance" 17 | ] 18 | } 19 | }, 20 | "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y str: 2 | """Function to convert text to lowercase 3 | Arguments: 4 | text (str): Text to be converted 5 | return: 6 | str: Convert text to lowercase 7 | """ 8 | return text.lower() 9 | -------------------------------------------------------------------------------- /notebooks/Text Summary based on sumy lib.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","metadata":{"id":"sYk6tEloRnnA"},"source":["Sumy: A python library for text summarization and there we have\n","\n","1. LexRank summarization that is based ranking,\n","2. Luhn based on frequency of most important words\n","3. TextRank summary that is based on text ranking\n","\n"]},{"cell_type":"code","execution_count":14,"metadata":{"executionInfo":{"elapsed":2,"status":"ok","timestamp":1693648496298,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"rMNa0l8uSX2a"},"outputs":[],"source":["#import and install sumy\n","# !pip install sumy\n","from sumy.summarizers.lex_rank import LexRankSummarizer\n","from sumy.parsers.plaintext import PlaintextParser\n","from sumy.nlp.tokenizers import Tokenizer # we can use nltk.tokenize import word_tokenize also\n"]},{"cell_type":"markdown","metadata":{"id":"ef5oJGs7S8nK"},"source":["Now we will take a text that we took on python based summary"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1693648378545,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"l8Udz8oeT0t5","outputId":"4220490e-f5d1-4d59-8a6a-0165090ff69a"},"outputs":[{"name":"stderr","output_type":"stream","text":["[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data] Package punkt is already up-to-date!\n"]},{"data":{"text/plain":["True"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["import nltk\n","nltk.download('punkt')"]},{"cell_type":"code","execution_count":17,"metadata":{"executionInfo":{"elapsed":5,"status":"ok","timestamp":1693648549144,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"ggwNg0bXSn9q"},"outputs":[],"source":["# text you can change based on your requirement:\n","text = \"\"\"\n","Artificial Intelligence (AI) is a multidisciplinary field of computer science and engineering that seeks to create intelligent systems capable of mimicking human cognitive functions. At its core, AI is the science of developing algorithms, models, and systems that can perform tasks that typically require human intelligence. These tasks encompass a wide range of activities, from problem-solving and decision-making to understanding natural language, recognizing patterns, and learning from data.\n","\n","One of the foundational principles of AI is the concept of machine learning, wherein AI systems can improve their performance on a specific task through exposure to data and experience, rather than relying solely on explicit programming. Machine learning algorithms enable AI systems to identify patterns, make predictions, and adapt to changing conditions, making them incredibly versatile and applicable across various domains.\n","\n","AI can be categorized into two broad types:\n","\n","Narrow or Weak AI: This type of AI is designed to excel at specific tasks or domains. Examples include voice assistants like Siri or recommendation systems on streaming platforms. While they may seem intelligent within their specialized scope, they lack general intelligence and self-awareness.\n","\n","General or Strong AI: This represents the aspiration of creating AI systems with human-level intelligence and cognitive abilities. General AI would possess the capacity to understand, learn, and adapt across a wide range of tasks and contexts, similar to human intelligence. Achieving true general AI remains a long-term goal and a subject of ongoing research.\n","\n","AI technologies encompass a diverse set of techniques and approaches, including neural networks, natural language processing, computer vision, robotics, and reinforcement learning, among others. These tools find applications across numerous sectors, such as healthcare (diagnosis and treatment optimization), finance (algorithmic trading and fraud detection), transportation (autonomous vehicles), and entertainment (video game AI and content recommendation).\n","\n","While AI holds immense promise, it also raises ethical and societal questions related to privacy, bias, transparency, and the potential impact on the workforce. As AI continues to advance, it is crucial to ensure responsible development and deployment, guided by ethical principles and a commitment to harnessing its potential for the betterment of society. In essence, AI represents a powerful and transformative force in the modern world, with the potential to reshape industries, improve efficiency, and enhance our understanding of intelligence and cognition.\n","\"\"\""]},{"cell_type":"markdown","metadata":{"id":"3P0YAtqXU5nD"},"source":["### Created a parser for the model as its need data to be sentence form"]},{"cell_type":"code","execution_count":20,"metadata":{"executionInfo":{"elapsed":6,"status":"ok","timestamp":1693648560103,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"oF358uWBUMgt"},"outputs":[],"source":["text = text.lower() # converted to lower case so that capital are considered to be same as small letter\n","parser = PlaintextParser.from_string(text, Tokenizer('english'))"]},{"cell_type":"code","execution_count":21,"metadata":{"executionInfo":{"elapsed":1,"status":"ok","timestamp":1693648562243,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"Joy67L9HTMS4"},"outputs":[],"source":["summarizer_lex = LexRankSummarizer() # creating object of class"]},{"cell_type":"code","execution_count":30,"metadata":{"executionInfo":{"elapsed":6,"status":"ok","timestamp":1693648782315,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"QWJKHqzaTYoQ"},"outputs":[],"source":["summary = summarizer_lex(parser.document,2) # summary of document and no. after parser is how many sentence you want it as output"]},{"cell_type":"code","execution_count":31,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2,"status":"ok","timestamp":1693648783779,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"QqE4_aaOT6-v","outputId":"69694a31-6633-44cd-a27c-2fed8c5e56a2"},"outputs":[{"name":"stdout","output_type":"stream","text":["at its core, ai is the science of developing algorithms, models, and systems that can perform tasks that typically require human intelligence.\n","these tasks encompass a wide range of activities, from problem-solving and decision-making to understanding natural language, recognizing patterns, and learning from data.\n"]}],"source":["for sentence in summary:\n"," print(sentence)"]},{"cell_type":"markdown","metadata":{"id":"R1WTX1xtVaWc"},"source":["Cool right, so summary now is given based on sentences and they are directly been taken here let give same input to different models also and see what they give.\n","\n","LSA (Latent semantic analysis) : It combines frequency technique with singular value decomposition to summarize texts"]},{"cell_type":"code","execution_count":32,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":7,"status":"ok","timestamp":1693649057982,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"eya38BsTTlK2","outputId":"974e5a51-0722-4ea4-b0f2-841fff8e7fe3"},"outputs":[{"name":"stdout","output_type":"stream","text":["machine learning algorithms enable ai systems to identify patterns, make predictions, and adapt to changing conditions, making them incredibly versatile and applicable across various domains.\n","examples include voice assistants like siri or recommendation systems on streaming platforms.\n"]}],"source":["from sumy.summarizers.lsa import LsaSummarizer\n","las_summarizer = LsaSummarizer()\n","summary = las_summarizer(parser.document, 2)\n","for sentence in summary:\n"," print(sentence)"]},{"cell_type":"markdown","metadata":{"id":"hEsgK9GUWfHg"},"source":["Now summary is different and somewhat more correct then LexRank\n","\n","Now try **textRank**"]},{"cell_type":"code","execution_count":33,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":6,"status":"ok","timestamp":1693649196642,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"},"user_tz":-330},"id":"28HRVkZuT-dK","outputId":"118e6ba5-0c3a-4a2c-a575-4b050d1f48af"},"outputs":[{"name":"stdout","output_type":"stream","text":["one of the foundational principles of ai is the concept of machine learning, wherein ai systems can improve their performance on a specific task through exposure to data and experience, rather than relying solely on explicit programming.\n","in essence, ai represents a powerful and transformative force in the modern world, with the potential to reshape industries, improve efficiency, and enhance our understanding of intelligence and cognition.\n"]}],"source":["from sumy.summarizers.text_rank import TextRankSummarizer\n","textrank_summarizer = TextRankSummarizer()\n","summary = textrank_summarizer(parser.document, 2)\n","for sentence in summary:\n"," print(sentence)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iz_F7i07W8_l"},"outputs":[],"source":[]}],"metadata":{"colab":{"authorship_tag":"ABX9TyMLz6oobd5O341d52n8YkKF","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /notebooks/Text Summary using Langchain.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOgukEd6Iw57UpcZYasDrrr"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lWi9mhQMKaAp","executionInfo":{"status":"ok","timestamp":1693679586198,"user_tz":-330,"elapsed":46233,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"e332b82a-4cf8-4461-ceef-639e5d2ce93c"},"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting openai\n"," Downloading openai-0.28.0-py3-none-any.whl (76 kB)\n","\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/76.5 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.5/76.5 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting tiktoken\n"," Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting chromadb\n"," Downloading chromadb-0.4.8-py3-none-any.whl (418 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m418.3/418.3 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting langchain\n"," Downloading langchain-0.0.279-py3-none-any.whl (1.6 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai) (2.31.0)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai) (4.66.1)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai) (3.8.5)\n","Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2023.6.3)\n","Collecting pydantic<2.0,>=1.9 (from chromadb)\n"," Downloading pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m38.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting chroma-hnswlib==0.7.2 (from chromadb)\n"," Downloading chroma-hnswlib-0.7.2.tar.gz (31 kB)\n"," Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n"," Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n"," Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Collecting fastapi<0.100.0,>=0.95.2 (from chromadb)\n"," Downloading fastapi-0.99.1-py3-none-any.whl (58 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.4/58.4 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting uvicorn[standard]>=0.18.3 (from chromadb)\n"," Downloading uvicorn-0.23.2-py3-none-any.whl (59 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.21.6 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.23.5)\n","Collecting posthog>=2.4.0 (from chromadb)\n"," Downloading posthog-3.0.2-py2.py3-none-any.whl (37 kB)\n","Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.7.1)\n","Collecting pulsar-client>=3.1.0 (from chromadb)\n"," Downloading pulsar_client-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m39.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting onnxruntime>=1.14.1 (from chromadb)\n"," Downloading onnxruntime-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m47.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting tokenizers>=0.13.2 (from chromadb)\n"," Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m109.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting pypika>=0.48.9 (from chromadb)\n"," Downloading PyPika-0.48.9.tar.gz (67 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n"," Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n"," Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Collecting overrides>=7.3.1 (from chromadb)\n"," Downloading overrides-7.4.0-py3-none-any.whl (17 kB)\n","Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.0.1)\n","Collecting bcrypt>=4.0.1 (from chromadb)\n"," Downloading bcrypt-4.0.1-cp36-abi3-manylinux_2_28_x86_64.whl (593 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m593.7/593.7 kB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n","Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.20)\n","Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n","Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain)\n"," Downloading dataclasses_json-0.5.14-py3-none-any.whl (26 kB)\n","Collecting langsmith<0.1.0,>=0.0.21 (from langchain)\n"," Downloading langsmith-0.0.33-py3-none-any.whl (36 kB)\n","Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.8.5)\n","Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.2.3)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (23.1.0)\n","Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (3.2.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (6.0.4)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (1.9.2)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (1.4.0)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai) (1.3.1)\n","Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.6.0,>=0.5.7->langchain)\n"," Downloading marshmallow-3.20.1-py3-none-any.whl (49 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.6.0,>=0.5.7->langchain)\n"," Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n","Collecting starlette<0.28.0,>=0.27.0 (from fastapi<0.100.0,>=0.95.2->chromadb)\n"," Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting coloredlogs (from onnxruntime>=1.14.1->chromadb)\n"," Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (23.5.26)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n","Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (3.20.3)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n","Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb)\n"," Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n","Collecting backoff>=1.10.0 (from posthog>=2.4.0->chromadb)\n"," Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n","Requirement already satisfied: python-dateutil>2.1 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb) (2.8.2)\n","Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from pulsar-client>=3.1.0->chromadb) (2023.7.22)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai) (2.0.4)\n","Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (2.0.2)\n","Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.7)\n","Collecting h11>=0.8 (from uvicorn[standard]>=0.18.3->chromadb)\n"," Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting httptools>=0.5.0 (from uvicorn[standard]>=0.18.3->chromadb)\n"," Downloading httptools-0.6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (428 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m428.8/428.8 kB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting python-dotenv>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n"," Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n","Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.18.3->chromadb)\n"," Downloading uvloop-0.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.1/4.1 MB\u001b[0m \u001b[31m120.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting watchfiles>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n"," Downloading watchfiles-0.20.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m98.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting websockets>=10.4 (from uvicorn[standard]>=0.18.3->chromadb)\n"," Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (3.7.1)\n","Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain)\n"," Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n","Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.14.1->chromadb)\n"," Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n","Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.3.0)\n","Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb) (1.1.3)\n","Building wheels for collected packages: chroma-hnswlib, pypika\n"," Building wheel for chroma-hnswlib (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for chroma-hnswlib: filename=chroma_hnswlib-0.7.2-cp310-cp310-linux_x86_64.whl size=2285752 sha256=1de8b66a5109f45b14640091e6f6112c255a86e1497c3b7c9f6f561a9080daf3\n"," Stored in directory: /root/.cache/pip/wheels/11/2b/0d/ee457f6782f75315bb5828d5c2dc5639d471afbd44a830b9dc\n"," Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pypika: filename=PyPika-0.48.9-py2.py3-none-any.whl size=53723 sha256=af30944566cf57ad8125b31150c42838dae7fd0a2efbcfed6a334eeea00fb1dc\n"," Stored in directory: /root/.cache/pip/wheels/e1/26/51/d0bffb3d2fd82256676d7ad3003faea3bd6dddc9577af665f4\n","Successfully built chroma-hnswlib pypika\n","Installing collected packages: tokenizers, pypika, monotonic, websockets, uvloop, python-dotenv, pydantic, pulsar-client, overrides, mypy-extensions, marshmallow, humanfriendly, httptools, h11, chroma-hnswlib, bcrypt, backoff, watchfiles, uvicorn, typing-inspect, tiktoken, starlette, posthog, langsmith, coloredlogs, openai, onnxruntime, fastapi, dataclasses-json, langchain, chromadb\n"," Attempting uninstall: pydantic\n"," Found existing installation: pydantic 2.2.1\n"," Uninstalling pydantic-2.2.1:\n"," Successfully uninstalled pydantic-2.2.1\n","Successfully installed backoff-2.2.1 bcrypt-4.0.1 chroma-hnswlib-0.7.2 chromadb-0.4.8 coloredlogs-15.0.1 dataclasses-json-0.5.14 fastapi-0.99.1 h11-0.14.0 httptools-0.6.0 humanfriendly-10.0 langchain-0.0.279 langsmith-0.0.33 marshmallow-3.20.1 monotonic-1.6 mypy-extensions-1.0.0 onnxruntime-1.15.1 openai-0.28.0 overrides-7.4.0 posthog-3.0.2 pulsar-client-3.3.0 pydantic-1.10.12 pypika-0.48.9 python-dotenv-1.0.0 starlette-0.27.0 tiktoken-0.4.0 tokenizers-0.13.3 typing-inspect-0.9.0 uvicorn-0.23.2 uvloop-0.17.0 watchfiles-0.20.0 websockets-11.0.3\n"]}],"source":["!pip install openai tiktoken chromadb langchain"]},{"cell_type":"markdown","source":["NOTE: save .env file to save secerts key\n","\n","OPENAI_API_KEY = API_KEY"],"metadata":{"id":"HutUIVhnLhpr"}},{"cell_type":"code","source":["from langchain.chat_models import ChatOpenAI\n","from langchain.document_loaders import WebBaseLoader\n","from langchain.chains.summarize import load_summarize_chain"],"metadata":{"id":"XYFhqPfJKnmE","executionInfo":{"status":"ok","timestamp":1693679877955,"user_tz":-330,"elapsed":3,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":3,"outputs":[]},{"cell_type":"markdown","source":["Load web page data"],"metadata":{"id":"hnM9RLz-MOH_"}},{"cell_type":"code","source":["loader = WebBaseLoader(\"https://www.hongkiat.com/blog/write-blog-summary/\")\n","docs = loader.load()"],"metadata":{"id":"ZZtO7EUIL8_3","executionInfo":{"status":"ok","timestamp":1693680433245,"user_tz":-330,"elapsed":1450,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":10,"outputs":[]},{"cell_type":"code","source":["docs"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"DyANv4ntOHZy","executionInfo":{"status":"ok","timestamp":1693680438067,"user_tz":-330,"elapsed":2,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"7cbeb2c7-1dda-43f8-cf94-d7c8cddf30bb"},"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[Document(page_content=\"\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nHow to Write the Summary of a Blog (5 Best Ways) - Hongkiat\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPlease enable JavaScript in your browser to enjoy a better experience.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nHongkiat\\n\\n\\n\\n\\nMain Menu\\n\\n\\nDesign / Dev \\n\\n\\nTechnology \\n\\n\\nInspiration \\n\\n\\nSocial Commerce \\n\\n\\n\\nAll \\n\\n\\nDeals\\n\\n\\n\\n\\n\\n\\n\\nSearch Hongkiat for:\\n\\n\\n\\n\\n\\n\\nReveal Search Form\\n\\n\\n\\n\\nReveal Off-canvas Navigation\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nHow to Write the Summary of a Blog (5 Best Ways) \\nBy Jessica Charles. in Blogging. Updated on\\xa0December 7, 2022. \\n\\n\\n\\n\\nToday, content requirements are much more intricate than they were a few years ago. For instance, blogs today need promotion on various platforms to generate organic traffic. Thus, a key aspect of a startup blog content is a good summary.\\nToday, we’re going to talk about the best ways you can write a summary for your blog. So, let’s dig in deeper.\\nWhy Blog Summary is Important?\\nA blog summary is a vital aspect of the content. It can accompany your primary text on many platforms online, and it can be an important draw for your target audience. So, if you wish to promote your content on social media, then a summary is the most important glimpse of your main content.\\nMoreover, suppose you’ll email your target audience about your blog. In that case, a small summary of your blog’s contents should suffice to attract them.\\n5 Ways to Write a Good Blog Summary\\nWriting a good summary takes a lot of effort and various aspects into it. Whether you do it manually or you get help from a summarizing tool (like this one), you must understand a few things to write a good summary.\\nIn this section, we’re talking about the five best ways to learn to write a good summary. So, let’s get started.\\n\\n1. Read Profusely and Take Notes\\nA good summary isn’t an extension but rather an overview. It’s a plot synopsis of your main story, which allows the reader to understand what the main content entails. It’s not meant to overtake but extend the reach of your primary content.\\nThat’s why it’s important that you read your content, then read it again. This way, you will allow yourself to comprehend the idea. Now, reading should be done with a purpose. Ask yourself a few questions, such as:\\n\\nWhich sections are more important?\\nWhich section requires more attention?\\nWhich section should lead the summary?\\nWhere is the bulk of your argument?\\n\\nThe focus should be on that particular section. Even though filler content is necessary to avoid, it’s not always the case in blogs. So, avoid using filler content as a part of your summary. And the only way to do that is by reading your content repeatedly.\\n2. Talk About Research & Importance\\nEvery content piece has a specific method behind it. It could be about technical aspects or general – the importance of research behind it never fades. That’s why your reader must know why the research was important in the first place.\\nHere’s an example of NASA (image below), one of the biggest organizations in the world, summarizing a blog for social media captions:\\n\\nIn this caption, it mentions something that’s in detail in the main article. However, it gives you a glimpse of how it uses the four images of Europa captured by the Juno Spacecraft. Then, it just gives us surface-level information about the “Valuable views” of this moon.\\nWhat it tells us is the research and importance behind it. Your summary needs to feature that. In blogs, it’s not always easy to overlay this information. But, if you link your blog in an email or on social media, you will provide this information.\\nThe only way to do that is by understanding the basic methods in the main content beforehand.\\n3. Condense Larger Ideas into Surface-Level Sentences\\nA summary is a brief report of your content. The keyword here is “brief,” but that doesn’t mean it cannot be detailed. Now, it can be difficult to do, as not all summaries are written equally. Some summary requirements are less than a hundred words.\\nWhile other summaries are almost equal to the primary content. In blogs, it’s more of the former than the latter. So, to achieve a specific length, you should condense larger ideas into surface-level sentences.\\nFor example, you must summarize the outcry over inflation in 50 states. But, the information cannot be covered in a few words. So, how do you write a sentence-based summary?\\nHere’s an example:\\n\\nThe outcry over the inflation is thoroughly covered by research and helps us “understand the causes behind it,Author Name\\n\\nThe underlined idea is the fundamental idea in the blog. Whereas the rest of the sentence is a summary of the larger idea at work. And the author’s name in the bold text tells us that this person wrote it.\\nSo, this kind of summary doesn’t only draw attention; it makes the reader want to read more. Plus, it’s much more suitable for a blog’s homepage, social media, or email.\\n4. For Each Section, Write a Sentence\\nNot all summaries are one-sentence glimpses. Instead, some summaries need to be longer to effectively capture an idea. That’s one of the main reasons that each section needs a separate sentence to cover all.\\nLet’s divide this into a simple equation. So, let’s say your content is around a thousand words. And you’re supposed to write a hundred-word summary.\\nHow do you do it? By summarizing each 100-words within 10-words.\\n\\nSo, you don’t only have the 10% of the original content but also a summary of all the important information within the article. But, it’s important to skip the introduction and conclusion from this equation.\\n5. Revise or Use a Summarizing Tool to Achieve Specific Length\\nThe final step you’ll take is to revise or summarize the rest of your content using a summarizing tool. But why is using a summarizing tool important? Because it can help you achieve a specific length for your summary with options like these:\\n\\nAs mentioned, some summaries must be longer than usual. Thus, with an option like this, a summarizing tool can make the job easier. Besides, if you write a summary manually and it doesn’t meet the word count, this process can also help you condense it.\\nConclusion\\nThese are the five ways to learn to write good blog summaries. Not only do you need manual input, but also the help of viable summarizing tools. So, it’s important that you read your content until you grasp the idea and then write the summary according to your understanding.\\n\\n\\n\\n\\n\\n\\n\\n\\nYou might also like\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n10 Reasons to Keep Blogging\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nCrafting Better Content for Your Startup's Blog\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n3 Common Mistakes in Guest Blogging to Avoid\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nMarsEdit: Offline WordPress Editor For Mac (Review)\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n5 Things to Do with Your Rejected Guest Post Pitch\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBuild a GitHub-Hosted Blog with Jekyll Now\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBlogging Advice: The Good, The Bad & The Ugly\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nHow to Blog Using Evernote\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nHow to Write Engaging Articles\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nThe 7 Sins Of Guest Blogging (Based On True Events)\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nHow Guest Posts May Derail Your Google Ranking\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n8 Tips to Getting Your Guest Post Published\\n\\nBlogging \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nHongkiat.com (HKDC). All Rights Reserved. 2023\\nReproduction of materials found on this site, in any form, without explicit permission is prohibited. Publishing policy ‐ Privacy Policy\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nClose Search\\n\\n\\n\\n\\nSearch Hongkiat\\n\\n\\n\\n\\n\\n\\nWebsiteFacebookTwitterInstagramPinterestLinkedInGoogle+YoutubeRedditDribbbleBehanceGithubCodePenWhatsappEmail\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\", metadata={'source': 'https://www.hongkiat.com/blog/write-blog-summary/', 'title': 'How to Write the Summary of a Blog (5 Best Ways) - Hongkiat', 'description': 'Today, content requirements are much more intricate than they were a few years ago. For instance, blogs today need promotion on various platforms to', 'language': 'en-US'})]"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","source":["llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo-16k', openai_api_key='XXX')"],"metadata":{"id":"4l1oqy54MLZ7","executionInfo":{"status":"ok","timestamp":1693680552803,"user_tz":-330,"elapsed":6,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":18,"outputs":[]},{"cell_type":"code","source":["chain = load_summarize_chain(llm, chain_type=\"stuff\")"],"metadata":{"id":"o6NIM8-uMMXw","executionInfo":{"status":"ok","timestamp":1693680554503,"user_tz":-330,"elapsed":6,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":19,"outputs":[]},{"cell_type":"code","source":["chain.run(docs)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":105},"id":"7Geba9ydObZA","executionInfo":{"status":"ok","timestamp":1693680559558,"user_tz":-330,"elapsed":2192,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"46cb8738-92bb-4287-8758-4a5b62e79369"},"execution_count":20,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'This blog post discusses the importance of writing a good summary for a blog and provides five ways to do so effectively. The methods include reading and taking notes, discussing research and importance, condensing larger ideas into surface-level sentences, writing a sentence for each section, and revising or using a summarizing tool to achieve a specific length. The post emphasizes the need for both manual input and the use of summarizing tools to create effective blog summaries.'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":20}]},{"cell_type":"code","source":["from langchain.chains.llm import LLMChain\n","from langchain.prompts import PromptTemplate\n","from langchain.chains.combine_documents.stuff import StuffDocumentsChain\n","\n","# Define prompt\n","prompt_template = \"\"\"Write a concise summary of the following:\n","\"{text}\"\n","CONCISE SUMMARY:\"\"\"\n","prompt = PromptTemplate.from_template(prompt_template)\n","\n","# Define LLM chain\n","llm = ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo-16k\", openai_api_key='XXX')\n","llm_chain = LLMChain(llm=llm, prompt=prompt)\n","\n","# Define StuffDocumentsChain\n","stuff_chain = StuffDocumentsChain(\n"," llm_chain=llm_chain, document_variable_name=\"text\"\n",")\n","\n","docs = loader.load()\n","print(stuff_chain.run(docs))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"AwEmUAqGOdPv","executionInfo":{"status":"ok","timestamp":1693680631525,"user_tz":-330,"elapsed":2397,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"4f02492f-882e-4adc-fa80-272157362acf"},"execution_count":22,"outputs":[{"output_type":"stream","name":"stdout","text":["This article discusses the importance of writing a good summary for a blog and provides five ways to do so effectively. The suggested methods include reading the content thoroughly, highlighting the research and importance behind it, condensing larger ideas into surface-level sentences, writing a separate sentence for each section, and using a summarizing tool to achieve the desired length. The article emphasizes the need for both manual input and the assistance of summarizing tools to create compelling blog summaries.\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"oiBv9HiBOyTM","executionInfo":{"status":"ok","timestamp":1693680919222,"user_tz":-330,"elapsed":805,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":26,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"96W4dGfsO_Ls","executionInfo":{"status":"ok","timestamp":1693680919957,"user_tz":-330,"elapsed":2,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":26,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"Ytj-RwRvPGjb","executionInfo":{"status":"ok","timestamp":1693680919958,"user_tz":-330,"elapsed":2,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":26,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"nd_wZsb4PH5w"},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /notebooks/Text summary based on transformers.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPgnp91bTBEnS5vpPS2IL2/"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"395c9e72179f4343bddf71e6c44a4c79":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_031e221b64d9422db4828586cf94ef0f","IPY_MODEL_fb15d8746d7a44c089c1ea8f5d53f3fa","IPY_MODEL_fce9fc3db0a5438eb65cd739ab87f806"],"layout":"IPY_MODEL_0fc48fa45e1c4ed49c6f8cf3bab33082"}},"031e221b64d9422db4828586cf94ef0f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7dc506a8ff1b48cc96f900cc43d35058","placeholder":"​","style":"IPY_MODEL_15024f1358fd43bf90682eb0ca974dde","value":"Downloading (…)ve/main/spiece.model: 100%"}},"fb15d8746d7a44c089c1ea8f5d53f3fa":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4156aedbf87148c399f7c7f0630da339","max":1912529,"min":0,"orientation":"horizontal","style":"IPY_MODEL_79874c934812433b975b71930b3d62a0","value":1912529}},"fce9fc3db0a5438eb65cd739ab87f806":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8830acbf645a419bb733c1c61b9cfdd9","placeholder":"​","style":"IPY_MODEL_9fbe8004c5634dbc948591f3de7d429d","value":" 1.91M/1.91M [00:00<00:00, 13.1MB/s]"}},"0fc48fa45e1c4ed49c6f8cf3bab33082":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7dc506a8ff1b48cc96f900cc43d35058":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"15024f1358fd43bf90682eb0ca974dde":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4156aedbf87148c399f7c7f0630da339":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"79874c934812433b975b71930b3d62a0":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8830acbf645a419bb733c1c61b9cfdd9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9fbe8004c5634dbc948591f3de7d429d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"71d0f3a66bd94db3a60d9723fe8babb2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c8b475d4ee0f4afd9d447c50a0ca17bd","IPY_MODEL_25a3d132c3844f29b0d02f599ee1480e","IPY_MODEL_39f26ffe6dd94088abe9528d85761fbd"],"layout":"IPY_MODEL_86188a25a52b4e828fecae59fb2f7c27"}},"c8b475d4ee0f4afd9d447c50a0ca17bd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a249d15025d44ba2aa15a5906fbb23bc","placeholder":"​","style":"IPY_MODEL_e84973e59e8d43bf94967ad48227170a","value":"Downloading (…)cial_tokens_map.json: 100%"}},"25a3d132c3844f29b0d02f599ee1480e":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_89556ac0da0e43a8b24c20db86a60cf3","max":65,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d40a2a13713a40ef99d2d10679bbd760","value":65}},"39f26ffe6dd94088abe9528d85761fbd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e05fcb1f7b2d46e28882bf87b699b86b","placeholder":"​","style":"IPY_MODEL_f6f1f482271a4519bd90664801a09a53","value":" 65.0/65.0 [00:00<00:00, 1.10kB/s]"}},"86188a25a52b4e828fecae59fb2f7c27":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a249d15025d44ba2aa15a5906fbb23bc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e84973e59e8d43bf94967ad48227170a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"89556ac0da0e43a8b24c20db86a60cf3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d40a2a13713a40ef99d2d10679bbd760":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e05fcb1f7b2d46e28882bf87b699b86b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f6f1f482271a4519bd90664801a09a53":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5904b340bf554266b0865c53cb01996b":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_3a5a98c8f49d45ba84f92bdcd11aec67","IPY_MODEL_fb82f2a286ba44c2a735308b90b7b7d5","IPY_MODEL_b4d6ea0cce674ccea9e7de67fb1afe74"],"layout":"IPY_MODEL_9ca671403cd34acba98544181b561b80"}},"3a5a98c8f49d45ba84f92bdcd11aec67":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b1c1143ac129402da6e459d5535e92d3","placeholder":"​","style":"IPY_MODEL_ff457009d90143058537737775aab2f1","value":"Downloading (…)okenizer_config.json: 100%"}},"fb82f2a286ba44c2a735308b90b7b7d5":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_05bfc9eb1cd747ee82788799fb85d600","max":87,"min":0,"orientation":"horizontal","style":"IPY_MODEL_22b19fafbf064fac93a578df05f5b987","value":87}},"b4d6ea0cce674ccea9e7de67fb1afe74":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7fd96fb4c2f84e109a67ed11dad67dbb","placeholder":"​","style":"IPY_MODEL_d6c564c4fe8f485abe34f87d553cd23b","value":" 87.0/87.0 [00:00<00:00, 1.67kB/s]"}},"9ca671403cd34acba98544181b561b80":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b1c1143ac129402da6e459d5535e92d3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ff457009d90143058537737775aab2f1":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"05bfc9eb1cd747ee82788799fb85d600":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"22b19fafbf064fac93a578df05f5b987":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7fd96fb4c2f84e109a67ed11dad67dbb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d6c564c4fe8f485abe34f87d553cd23b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"15e2af9148b349a3a6414558afae0f08":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d98f32aafbdf437eaf01a860409f00bb","IPY_MODEL_c274c9c08b6a4b268208c7545fae9bb4","IPY_MODEL_950857d4893c40b7badee2256c68388d"],"layout":"IPY_MODEL_51bbc4248b6b4af4ade943406d7bc9ae"}},"d98f32aafbdf437eaf01a860409f00bb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_478e53ce205c466e9c58bbbac2f478c6","placeholder":"​","style":"IPY_MODEL_025215170031452db3c27ee17fd8d8b3","value":"Downloading (…)lve/main/config.json: 100%"}},"c274c9c08b6a4b268208c7545fae9bb4":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_85a70a8a5e524dc6a3042f9e10463063","max":1392,"min":0,"orientation":"horizontal","style":"IPY_MODEL_239c0b6bcf434f23a155b0fb080d7e3f","value":1392}},"950857d4893c40b7badee2256c68388d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2142d4b28cd8463093af7c93f5c00765","placeholder":"​","style":"IPY_MODEL_eb02b58bb8d04f8ca33aa36d85d37b84","value":" 1.39k/1.39k [00:00<00:00, 22.3kB/s]"}},"51bbc4248b6b4af4ade943406d7bc9ae":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"478e53ce205c466e9c58bbbac2f478c6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"025215170031452db3c27ee17fd8d8b3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"85a70a8a5e524dc6a3042f9e10463063":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"239c0b6bcf434f23a155b0fb080d7e3f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"2142d4b28cd8463093af7c93f5c00765":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eb02b58bb8d04f8ca33aa36d85d37b84":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"77a3084337a14f00ba87dc4c5079c559":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_26c8e8fefbb04b84a25e0b3c902680c6","IPY_MODEL_eccfecf0fb0048478628118192507ac6","IPY_MODEL_0d91578893c948cca3e4db852a7cba96"],"layout":"IPY_MODEL_307ff80ed3db4a7399a679d6911f2527"}},"26c8e8fefbb04b84a25e0b3c902680c6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7ca080d3a19b4a57aa7e71e61feab662","placeholder":"​","style":"IPY_MODEL_d1c86b3d165042c493358d985dd126f6","value":"Downloading pytorch_model.bin: 100%"}},"eccfecf0fb0048478628118192507ac6":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c879ab2cd76541338e8f9b75e125605c","max":2275329241,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0c971cdad12742609e0e3ec90463407c","value":2275329241}},"0d91578893c948cca3e4db852a7cba96":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e1b567950e9c45158fe788bd722c5abf","placeholder":"​","style":"IPY_MODEL_f045b0d4026c404cb38d08b654b231b2","value":" 2.28G/2.28G [00:26<00:00, 102MB/s]"}},"307ff80ed3db4a7399a679d6911f2527":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7ca080d3a19b4a57aa7e71e61feab662":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d1c86b3d165042c493358d985dd126f6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c879ab2cd76541338e8f9b75e125605c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c971cdad12742609e0e3ec90463407c":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e1b567950e9c45158fe788bd722c5abf":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f045b0d4026c404cb38d08b654b231b2":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8c6b36704ab94036b7f5bbe9bfbeed24":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_5257d4a4d112447bb4c3875b02053af1","IPY_MODEL_50a26dae74ed4e6392eda4c03f1ab92f","IPY_MODEL_407012ca4c204b7aa9685ea034d9dbeb"],"layout":"IPY_MODEL_3a267301185f49278c91ce986f957b62"}},"5257d4a4d112447bb4c3875b02053af1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cd7ed6aa7ade44379b8cbc4fa6d119b3","placeholder":"​","style":"IPY_MODEL_83102867fa154abf9b92718d1d977f4b","value":"Downloading (…)neration_config.json: 100%"}},"50a26dae74ed4e6392eda4c03f1ab92f":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_82581858a00c416d9f430e4c36866710","max":259,"min":0,"orientation":"horizontal","style":"IPY_MODEL_128e00463c414f13978f08ac3302da30","value":259}},"407012ca4c204b7aa9685ea034d9dbeb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2e970f8d09994940b89c9bb1a2904c0f","placeholder":"​","style":"IPY_MODEL_b4b531701ae54438a50047327152c97b","value":" 259/259 [00:00<00:00, 5.14kB/s]"}},"3a267301185f49278c91ce986f957b62":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cd7ed6aa7ade44379b8cbc4fa6d119b3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"83102867fa154abf9b92718d1d977f4b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"82581858a00c416d9f430e4c36866710":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"128e00463c414f13978f08ac3302da30":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"2e970f8d09994940b89c9bb1a2904c0f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b4b531701ae54438a50047327152c97b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","source":["Text summary based on transformer pretrained model and here we will not train the model but use directly to use it for text summary"],"metadata":{"id":"SzLFF2BRYYlh"}},{"cell_type":"code","execution_count":1,"metadata":{"id":"u65lygxzXId7","executionInfo":{"status":"ok","timestamp":1693649965466,"user_tz":-330,"elapsed":16611,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"outputs":[],"source":["# importing lib\n","# !pip install transformers\n","# !pip install SentencePiece\n","from transformers import PegasusForConditionalGeneration, PegasusTokenizer, pipeline"]},{"cell_type":"markdown","source":["Each model have there own tokenizer, and model generator so use them directly as i did above.\n","\n","Now we wil:\n","\n","First step is to load the model tokenizer with pre trained t"],"metadata":{"id":"EYcXhYHnYzMw"}},{"cell_type":"code","source":["model_name = 'google/pegasus-xsum'\n","pegasus_tokenizer = PegasusTokenizer.from_pretrained(model_name) # tokenizer of model"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["395c9e72179f4343bddf71e6c44a4c79","031e221b64d9422db4828586cf94ef0f","fb15d8746d7a44c089c1ea8f5d53f3fa","fce9fc3db0a5438eb65cd739ab87f806","0fc48fa45e1c4ed49c6f8cf3bab33082","7dc506a8ff1b48cc96f900cc43d35058","15024f1358fd43bf90682eb0ca974dde","4156aedbf87148c399f7c7f0630da339","79874c934812433b975b71930b3d62a0","8830acbf645a419bb733c1c61b9cfdd9","9fbe8004c5634dbc948591f3de7d429d","71d0f3a66bd94db3a60d9723fe8babb2","c8b475d4ee0f4afd9d447c50a0ca17bd","25a3d132c3844f29b0d02f599ee1480e","39f26ffe6dd94088abe9528d85761fbd","86188a25a52b4e828fecae59fb2f7c27","a249d15025d44ba2aa15a5906fbb23bc","e84973e59e8d43bf94967ad48227170a","89556ac0da0e43a8b24c20db86a60cf3","d40a2a13713a40ef99d2d10679bbd760","e05fcb1f7b2d46e28882bf87b699b86b","f6f1f482271a4519bd90664801a09a53","5904b340bf554266b0865c53cb01996b","3a5a98c8f49d45ba84f92bdcd11aec67","fb82f2a286ba44c2a735308b90b7b7d5","b4d6ea0cce674ccea9e7de67fb1afe74","9ca671403cd34acba98544181b561b80","b1c1143ac129402da6e459d5535e92d3","ff457009d90143058537737775aab2f1","05bfc9eb1cd747ee82788799fb85d600","22b19fafbf064fac93a578df05f5b987","7fd96fb4c2f84e109a67ed11dad67dbb","d6c564c4fe8f485abe34f87d553cd23b","15e2af9148b349a3a6414558afae0f08","d98f32aafbdf437eaf01a860409f00bb","c274c9c08b6a4b268208c7545fae9bb4","950857d4893c40b7badee2256c68388d","51bbc4248b6b4af4ade943406d7bc9ae","478e53ce205c466e9c58bbbac2f478c6","025215170031452db3c27ee17fd8d8b3","85a70a8a5e524dc6a3042f9e10463063","239c0b6bcf434f23a155b0fb080d7e3f","2142d4b28cd8463093af7c93f5c00765","eb02b58bb8d04f8ca33aa36d85d37b84"]},"id":"hWnooZmlYx1s","executionInfo":{"status":"ok","timestamp":1693649966933,"user_tz":-330,"elapsed":1472,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"79eefa14-b130-4890-dbe1-677754e9a63d"},"execution_count":2,"outputs":[{"output_type":"display_data","data":{"text/plain":["Downloading (…)ve/main/spiece.model: 0%| | 0.00/1.91M [00:00. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=True`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n","/usr/local/lib/python3.10/dist-packages/transformers/convert_slow_tokenizer.py:470: UserWarning: The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.\n"," warnings.warn(\n"]}]},{"cell_type":"markdown","source":["Cool, now we have tokenizer to convert text to tokens or in our case reviews to tokens"],"metadata":{"id":"b0gq8qKz2My1"}},{"cell_type":"code","source":["def preprocess_function(data):\n"," \"\"\"This function is used to fix things tokenizer like max input length that can be given,\n"," maximum target length if maximum size exceed the length then truncate that data and lastly\n"," we add labels to model inputs data\n"," \"\"\"\n"," max_input_length = 512 #\n"," max_target_length = 30\n"," model_inputs = tokenizer(data['review_body'], max_length=max_input_length, truncation=True)\n"," labels = tokenizer(data['review_title'], max_length=max_target_length, truncation=True)\n"," model_inputs['labels'] = labels['input_ids']\n"," return model_inputs\n"],"metadata":{"id":"oR2PrKMF1fSI","executionInfo":{"status":"ok","timestamp":1693659930226,"user_tz":-330,"elapsed":5,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":13,"outputs":[]},{"cell_type":"code","source":["tokenize_data = filter_data.map(preprocess_function, batched=True) # here map isfunction that on datasets"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["07e5f6a95c45455497d04132049b89b6","6ef9278222074d11a6d34c658138c136","7ac9a6ed93064df9a1361e58abbc7d21","02ccddf089e041afba8bc70382642b78","603e5c683f93457c990e30beb17777e5","8fbba0f0aaf2417092e09c1aeeb86155","03cf7424531f4a9e8c15df29fe324f45","23de80321ec44aa999196070216c40f1","d634b16e516b4198af6294e7b15e6cb5","345f461579684574b3eff98afe1c80d0","8791b1a0b3054100bea2b6d27c920d7f"]},"id":"aYAGwUS03XLB","executionInfo":{"status":"ok","timestamp":1693659930226,"user_tz":-330,"elapsed":5,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"bfadc839-5e79-48fd-9e5d-b73988575b41"},"execution_count":14,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map: 0%| | 0/1 [00:00"],"text/html":["\n","
\n"," \n"," \n"," [80/80 14:12, Epoch 8/8]\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
EpochTraining LossValidation LossRouge1Rouge2RougelRougelsum
120.86710014.3067950.0000000.0000000.0000000.000000
222.80580016.2170540.0000000.0000000.0000000.000000
321.99750015.3784800.0000000.0000000.0000000.000000
420.58090014.8067630.0000000.0000000.0000000.000000
518.92670014.4034240.0000000.0000000.0000000.000000
620.21870014.8034870.0000000.0000000.0000000.000000
719.90130014.8934330.0000000.0000000.0000000.000000
818.97750015.2390520.0000000.0000000.0000000.000000

"]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["TrainOutput(global_step=80, training_loss=20.290340995788576, metrics={'train_runtime': 863.2464, 'train_samples_per_second': 0.695, 'train_steps_per_second': 0.093, 'total_flos': 93201427445760.0, 'train_loss': 20.290340995788576, 'epoch': 8.0})"]},"metadata":{},"execution_count":36}]},{"cell_type":"code","source":["trainer.evaluate()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":193},"id":"wPdRUQo0CfPI","executionInfo":{"status":"ok","timestamp":1693662282259,"user_tz":-330,"elapsed":1318,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"fedfa7d3-22e1-48a2-a437-764d5f71780b"},"execution_count":37,"outputs":[{"output_type":"display_data","data":{"text/plain":[""],"text/html":["\n","

\n"," \n"," \n"," [1/1 : < :]\n","
\n"," "]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["{'eval_loss': 15.239051818847656,\n"," 'eval_rouge1': 0.0,\n"," 'eval_rouge2': 0.0,\n"," 'eval_rougeL': 0.0,\n"," 'eval_rougeLsum': 0.0,\n"," 'eval_runtime': 0.6349,\n"," 'eval_samples_per_second': 1.575,\n"," 'eval_steps_per_second': 1.575,\n"," 'epoch': 8.0}"]},"metadata":{},"execution_count":37}]},{"cell_type":"markdown","source":["We got a good accracy on a small dataset.\n","Great!"],"metadata":{"id":"eQg8gSRzI_9m"}},{"cell_type":"code","source":[],"metadata":{"id":"lyE84pPYI3n5"},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /notebooks/Text summary using python.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOzfZBSpkPL4BUs0taZZoxU"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Text Summary:\n","Now we have a very big text files and no one have time to read that all and they want it everything there summary to know the full context.\n","\n","So here i will be creating it a simple ranking based python code."],"metadata":{"id":"eXakV_4QJ7sC"}},{"cell_type":"code","source":["# imported library\n","import nltk\n","from nltk.corpus import stopwords\n","from nltk.tokenize import word_tokenize, sent_tokenize\n","nltk.download('stopwords')\n","nltk.download('punkt')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"spCCsvW5Kawu","executionInfo":{"status":"ok","timestamp":1693722396340,"user_tz":-330,"elapsed":578,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"6b7e6a73-18f0-423e-e94f-fbfb1a0302b9"},"execution_count":11,"outputs":[{"output_type":"stream","name":"stderr","text":["[nltk_data] Downloading package stopwords to /root/nltk_data...\n","[nltk_data] Package stopwords is already up-to-date!\n","[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data] Package punkt is already up-to-date!\n"]},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","source":["# text you can change based on your requirement:\n","text = \"\"\"\n","Artificial Intelligence (AI) is a multidisciplinary field of computer science and engineering that seeks to create intelligent systems capable of mimicking human cognitive functions. At its core, AI is the science of developing algorithms, models, and systems that can perform tasks that typically require human intelligence. These tasks encompass a wide range of activities, from problem-solving and decision-making to understanding natural language, recognizing patterns, and learning from data.\n","\n","One of the foundational principles of AI is the concept of machine learning, wherein AI systems can improve their performance on a specific task through exposure to data and experience, rather than relying solely on explicit programming. Machine learning algorithms enable AI systems to identify patterns, make predictions, and adapt to changing conditions, making them incredibly versatile and applicable across various domains.\n","\n","AI can be categorized into two broad types:\n","\n","Narrow or Weak AI: This type of AI is designed to excel at specific tasks or domains. Examples include voice assistants like Siri or recommendation systems on streaming platforms. While they may seem intelligent within their specialized scope, they lack general intelligence and self-awareness.\n","\n","General or Strong AI: This represents the aspiration of creating AI systems with human-level intelligence and cognitive abilities. General AI would possess the capacity to understand, learn, and adapt across a wide range of tasks and contexts, similar to human intelligence. Achieving true general AI remains a long-term goal and a subject of ongoing research.\n","\n","AI technologies encompass a diverse set of techniques and approaches, including neural networks, natural language processing, computer vision, robotics, and reinforcement learning, among others. These tools find applications across numerous sectors, such as healthcare (diagnosis and treatment optimization), finance (algorithmic trading and fraud detection), transportation (autonomous vehicles), and entertainment (video game AI and content recommendation).\n","\n","While AI holds immense promise, it also raises ethical and societal questions related to privacy, bias, transparency, and the potential impact on the workforce. As AI continues to advance, it is crucial to ensure responsible development and deployment, guided by ethical principles and a commitment to harnessing its potential for the betterment of society. In essence, AI represents a powerful and transformative force in the modern world, with the potential to reshape industries, improve efficiency, and enhance our understanding of intelligence and cognition.\n","\"\"\""],"metadata":{"id":"stmBS9TYK4aI","executionInfo":{"status":"ok","timestamp":1693722419036,"user_tz":-330,"elapsed":567,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":12,"outputs":[]},{"cell_type":"markdown","source":["### here i am taking list of all the stop words and use them to remove the stop words from the list so that we will not mark the sentence rank based on the stopwords like a, is, the etc."],"metadata":{"id":"C39HWcKWLzeV"}},{"cell_type":"code","source":["stop_words = set(stopwords.words('english'))"],"metadata":{"id":"l8UDM-LmLlyW","executionInfo":{"status":"ok","timestamp":1693722447288,"user_tz":-330,"elapsed":598,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":13,"outputs":[]},{"cell_type":"markdown","source":["#### Now i got the list of all the stopwords in english and next convert the above text to tokens using **word_tokenize**"],"metadata":{"id":"97lGqT4_MO9N"}},{"cell_type":"code","source":["text = text.lower() # converting text to lower so that it will take the and The words same same for other words"],"metadata":{"id":"ahk8T9syM9YS","executionInfo":{"status":"ok","timestamp":1693722475835,"user_tz":-330,"elapsed":594,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":14,"outputs":[]},{"cell_type":"code","source":["words = word_tokenize(text)"],"metadata":{"id":"692d1Eb3MFtD","executionInfo":{"status":"ok","timestamp":1693722491620,"user_tz":-330,"elapsed":599,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}}},"execution_count":15,"outputs":[]},{"cell_type":"markdown","source":["##### Create frequency table to keep the score of each word"],"metadata":{"id":"aeMcgkMeMwRq"}},{"cell_type":"code","source":["word_freq = {} # freq count of words (how many times they came in the text)\n","for word in words:\n"," if word in stop_words:\n"," continue\n"," if word in word_freq:\n"," word_freq[word] += 1 # increase if word came again\n"," else:\n"," word_freq[word] = 1 # if first time add to dict"],"metadata":{"id":"KRbMvRQtMmBq"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["##### Creating sentence ranking based on word freq to mark sentence and take out top rank sentence from the full text as summary"],"metadata":{"id":"7Kr9Nn-XOLJK"}},{"cell_type":"code","source":["sentences = sent_tokenize(text) # to create sentence from text\n","sentence_ranking = {}\n","for sentence in sentences:\n"," for word, freq in word_freq.items():\n"," if word in sentence:\n"," if sentence in sentence_ranking: #increase rank if word in sentence by word frequency\n"," sentence_ranking[sentence] += freq\n"," else:\n"," sentence_ranking[sentence] = freq"],"metadata":{"id":"dTuSL30KNVx3"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["##### Now core part is completed, we will calculate avg of sentence ranking and once we have it then use to remove sentence that rank below the avg and sometimes 20% more on avg to get more good result"],"metadata":{"id":"5zYJDjvDPUBv"}},{"cell_type":"code","source":["rank_sum = 0\n","for sentence in sentence_ranking:\n"," rank_sum += sentence_ranking[sentence]\n","print(f'Rank sum: {rank_sum}')\n","avg = int(rank_sum / len(sentence_ranking))\n","print(f'Avg: {avg}')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zla0_GwRPP2y","executionInfo":{"status":"ok","timestamp":1693721625005,"user_tz":-330,"elapsed":595,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"8f666be2-7b7f-4513-d831-0d64bf54a1c2"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Rank sum: 1299\n","Avg: 81\n"]}]},{"cell_type":"markdown","source":["Now lets take out all sentence that are at high rank and add to summary"],"metadata":{"id":"6zIQuKMDQVpq"}},{"cell_type":"code","source":["avg* 1.2"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"xXiCeFJcrZ9_","executionInfo":{"status":"ok","timestamp":1693721672958,"user_tz":-330,"elapsed":4,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"e5003a43-bb8a-4cdc-a580-e97f473f5e35"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["97.2"]},"metadata":{},"execution_count":9}]},{"cell_type":"code","source":["summary = ''\n","for sentence in sentences:\n"," if sentence_ranking[sentence] > (1.2*avg):\n"," summary += ' '+ sentence\n","print(summary)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"liyAxYt5QD2X","executionInfo":{"status":"ok","timestamp":1693721683258,"user_tz":-330,"elapsed":603,"user":{"displayName":"shubham mandowara","userId":"01086830669491528691"}},"outputId":"ca5fb23d-b500-403a-a4ec-2a88037a0457"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":[" at its core, ai is the science of developing algorithms, models, and systems that can perform tasks that typically require human intelligence. one of the foundational principles of ai is the concept of machine learning, wherein ai systems can improve their performance on a specific task through exposure to data and experience, rather than relying solely on explicit programming. machine learning algorithms enable ai systems to identify patterns, make predictions, and adapt to changing conditions, making them incredibly versatile and applicable across various domains. general ai would possess the capacity to understand, learn, and adapt across a wide range of tasks and contexts, similar to human intelligence. these tools find applications across numerous sectors, such as healthcare (diagnosis and treatment optimization), finance (algorithmic trading and fraud detection), transportation (autonomous vehicles), and entertainment (video game ai and content recommendation).\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"OXty_m3GQx9O"},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /python_algo.py: -------------------------------------------------------------------------------- 1 | # imported library 2 | import nltk 3 | from nltk.corpus import stopwords 4 | from nltk.tokenize import word_tokenize, sent_tokenize 5 | from helper.helper import convert_text_to_lowercase 6 | 7 | nltk.download("stopwords") 8 | nltk.download("punkt") 9 | stop_words = set(stopwords.words("english")) 10 | 11 | 12 | def main(text, sentence_on_output) -> str: 13 | """Main function to summarize the text using python based rank and frequency count of words 14 | Arguments: 15 | text (str): text to summarize 16 | sentence_on_output (int): Number of sentences on output 17 | Returns: 18 | str: Summary of text 19 | """ 20 | text = convert_text_to_lowercase(text=text) 21 | words = word_tokenize(text) 22 | word_freq = {} # freq count of words (how many times they came in the text) 23 | for word in words: 24 | if word in stop_words: 25 | continue 26 | if word in word_freq: 27 | word_freq[word] += 1 # increase if word came again 28 | else: 29 | word_freq[word] = 1 # if first time add to dict 30 | 31 | sentences = sent_tokenize(text) # to create sentence from text 32 | sentence_ranking = {} 33 | for sentence in sentences: 34 | for word, freq in word_freq.items(): 35 | if word in sentence: 36 | if ( 37 | sentence in sentence_ranking 38 | ): # increase rank if word in sentence by word frequency 39 | sentence_ranking[sentence] += freq 40 | else: 41 | sentence_ranking[sentence] = freq 42 | 43 | rank_sum = 0 44 | for sentence in sentence_ranking: 45 | rank_sum += sentence_ranking[sentence] 46 | avg = int(rank_sum / len(sentence_ranking)) 47 | summary = "" 48 | count = 0 49 | for sentence in sentences: 50 | if sentence_ranking[sentence] > (1.2 * avg): 51 | summary += " " + sentence 52 | count += 1 53 | if count == sentence_on_output: 54 | return summary 55 | return summary 56 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nltk==3.8.1 2 | streamlit==1.26.0 3 | sumy==0.11.0 4 | # transformers==4.32.1 5 | # sentencepiece == 0.1.99 6 | # torch == 2.0.1 7 | -------------------------------------------------------------------------------- /streamlit.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from python_algo import main as python_main 3 | from sumy_lib_based_summary import main as sumy_main 4 | # from transformers_based_summary import main as transformers_main 5 | 6 | if __name__ == "__main__": 7 | st.runtime.legacy_caching.clear_cache() 8 | st.set_page_config(layout="wide") 9 | st.title("Text Summarization ✍️") 10 | st.sidebar.header( 11 | "It's time to master Summarization using a variety of models!" 12 | ) 13 | st.sidebar.subheader("Select a model from the list 👇🏻:") 14 | model_selection = st.sidebar.selectbox( 15 | "👇🏻", 16 | [ 17 | "", 18 | "Core Python algo(Frequency and Ranking based)", 19 | "Lex Rank: From Python lib sumy", 20 | "LSA: From Python lib sumy", 21 | "Text Rank: From Python lib sumy", 22 | # "Transformers: Model google/pegasus-xsum" 23 | ], 24 | ) 25 | 26 | st.sidebar.write("----") 27 | st.sidebar.subheader("About this app:") 28 | st.sidebar.write( 29 | "Designed by **Shubham Mandowara** to showcase text summarization using various techniques. You can select the model, and add text to summarize." 30 | ) 31 | st.sidebar.write("I hope this is helpful. Please feel free to contact me if you have any queries." 32 | ) 33 | st.sidebar.write("----") 34 | st.sidebar.subheader( 35 | "🚀 Follow me for the latest insights on AI, ML, DL, Generative AI, Deployment, and MLOps! Stay ahead of the curve. 📊🤖 #AI #MachineLearning #DeepLearning #Tech" 36 | ) 37 | 38 | with st.sidebar: 39 | column1, column2 = st.columns(2) 40 | column1.markdown( 41 | "[![Linkedin](https://img.icons8.com/material-outlined/48/000000/linkedin.png)](https://www.linkedin.com/in/shubhammandowara/)" 42 | ) 43 | column2.markdown( 44 | "[![Github](https://img.icons8.com/material-outlined/48/000000/github.png)](https://github.com/ShubhamMandowara)" 45 | ) 46 | 47 | st.info( 48 | """**Ctrl + Enter** to get summary OR click anywhere outside the input box after entering text \n 49 | **Select a model from the left sidebar list**""", 50 | icon="ℹ️", 51 | ) 52 | st.write("Youtube : [Dive into the code's secrets in a captivating video journey!](https://youtu.be/s3WQW1Cd8eo?si=nU1dEhFnylqBf345)") 53 | 54 | text_to_summarize = st.text_area("Enter your text to summarize:") 55 | no_of_sentence_on_output = st.number_input( 56 | "No. of sentences on output you want", min_value=2, max_value=100 57 | ) 58 | st.write("Selected Model:- ", model_selection) 59 | summary = st.write("Summary:") 60 | if ( 61 | model_selection != "" 62 | and text_to_summarize != "" 63 | and no_of_sentence_on_output != None 64 | ): 65 | if model_selection == "Core Python algo(Frequency and Ranking based)": 66 | st.write( 67 | python_main( 68 | text=text_to_summarize, sentence_on_output=no_of_sentence_on_output 69 | ) 70 | ) 71 | elif model_selection == "Lex Rank: From Python lib sumy": 72 | st.write( 73 | sumy_main( 74 | text=text_to_summarize, 75 | model_name="Lex Rank", 76 | sentence_on_output=no_of_sentence_on_output, 77 | ) 78 | ) 79 | elif model_selection == "LSA: From Python lib sumy": 80 | st.write( 81 | sumy_main( 82 | text=text_to_summarize, 83 | model_name="LSA", 84 | sentence_on_output=no_of_sentence_on_output, 85 | ) 86 | ) 87 | elif model_selection == "Text Rank: From Python lib sumy": 88 | st.write( 89 | sumy_main( 90 | text=text_to_summarize, 91 | model_name="Text Rank", 92 | sentence_on_output=no_of_sentence_on_output, 93 | ) 94 | ) 95 | # elif model_selection == "Transformers: Model google/pegasus-xsum": 96 | # st.write(transformers_main(text=text_to_summarize)) 97 | else: 98 | st.write("Not entering into any condition") 99 | -------------------------------------------------------------------------------- /sumy_lib_based_summary.py: -------------------------------------------------------------------------------- 1 | from sumy.summarizers.lex_rank import LexRankSummarizer 2 | from sumy.summarizers.lsa import LsaSummarizer 3 | from sumy.parsers.plaintext import PlaintextParser 4 | from sumy.summarizers.text_rank import TextRankSummarizer 5 | from sumy.nlp.tokenizers import Tokenizer 6 | from helper.helper import convert_text_to_lowercase 7 | import nltk 8 | from typing import List, Tuple 9 | 10 | nltk.download("punkt") 11 | 12 | 13 | def common_process(text: str) -> str: 14 | """Function to convert text to lowercase and convert text to tokens using parser 15 | Arguments: 16 | text (str): text to convert 17 | Returns: 18 | tokens: token parser 19 | """ 20 | text = convert_text_to_lowercase(text) 21 | parser = PlaintextParser.from_string(text, Tokenizer("english")) 22 | return parser 23 | 24 | 25 | def common_return_process(text: Tuple) -> str: 26 | """Function to join the text sentence 27 | Arguments: 28 | text (str): text to join 29 | Returns: 30 | str: summary 31 | """ 32 | joined_sentence = "" 33 | for t1 in text: 34 | joined_sentence += " " + str(t1) 35 | return joined_sentence 36 | 37 | 38 | def main(text: str, model_name: str, sentence_on_output: int = 2) -> str: 39 | """Main function to summarize the text using sumy lib 40 | Arguments: 41 | text (str): text to summarize 42 | model_name (str): name of the model 43 | sentence_on_output (int): Number of sentences on output 44 | Returns: 45 | str: Summary of text 46 | """ 47 | summarizer = None 48 | if model_name == "Lex Rank": 49 | summarizer = LexRankSummarizer() 50 | elif model_name == "LSA": 51 | summarizer = LsaSummarizer() 52 | elif model_name == "Text Rank": 53 | summarizer = TextRankSummarizer() 54 | else: 55 | raise "Wrong model name / Model name is not defined" 56 | parser = common_process(text=text) 57 | summary = summarizer(parser.document, sentence_on_output) 58 | return_text = common_return_process(text=summary) 59 | return return_text 60 | -------------------------------------------------------------------------------- /transformers_based_summary.py: -------------------------------------------------------------------------------- 1 | # from transformers import PegasusForConditionalGeneration, PegasusTokenizer, pipeline 2 | 3 | # model_name = 'google/pegasus-xsum' 4 | # pegasus_tokenizer = PegasusTokenizer.from_pretrained(model_name) # tokenizer of model 5 | # pegasus_model = PegasusForConditionalGeneration.from_pretrained(model_name) 6 | 7 | # def main(text): 8 | # summarizer = pipeline( 9 | # "summarization", 10 | # model=model_name, 11 | # tokenizer=pegasus_tokenizer, 12 | # framework='pt', 13 | # ) 14 | # summary = summarizer(text, min_length=30, max_length=150) 15 | # return summary[0] 16 | --------------------------------------------------------------------------------