├── .gitignore
├── LICENSE
├── README.md
├── analyzer.py
├── app.py
├── references.py
├── requirements.txt
├── static
└── css
│ └── styles.css
└── templates
├── index.html
└── response.html
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | flask_session/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | .pybuilder/
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | # For a library or package, you might want to ignore these files since the code is
88 | # intended to run in multiple environments; otherwise, check them in:
89 | # .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # poetry
99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | # This is especially recommended for binary packages to ensure reproducibility, and is more
101 | # commonly ignored for libraries.
102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 |
105 | # pdm
106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | # in version control.
110 | # https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 |
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 |
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 |
120 | # SageMath parsed files
121 | *.sage.py
122 |
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 |
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 |
136 | # Rope project settings
137 | .ropeproject
138 |
139 | # mkdocs documentation
140 | /site
141 |
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 |
147 | # Pyre type checker
148 | .pyre/
149 |
150 | # pytype static type analyzer
151 | .pytype/
152 |
153 | # Cython debug symbols
154 | cython_debug/
155 |
156 | # PyCharm
157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | # and can be added to the global gitignore or merged into this file. For a more nuclear
160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Creative Commons Legal Code
2 |
3 | CC0 1.0 Universal
4 |
5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12 | HEREUNDER.
13 |
14 | Statement of Purpose
15 |
16 | The laws of most jurisdictions throughout the world automatically confer
17 | exclusive Copyright and Related Rights (defined below) upon the creator
18 | and subsequent owner(s) (each and all, an "owner") of an original work of
19 | authorship and/or a database (each, a "Work").
20 |
21 | Certain owners wish to permanently relinquish those rights to a Work for
22 | the purpose of contributing to a commons of creative, cultural and
23 | scientific works ("Commons") that the public can reliably and without fear
24 | of later claims of infringement build upon, modify, incorporate in other
25 | works, reuse and redistribute as freely as possible in any form whatsoever
26 | and for any purposes, including without limitation commercial purposes.
27 | These owners may contribute to the Commons to promote the ideal of a free
28 | culture and the further production of creative, cultural and scientific
29 | works, or to gain reputation or greater distribution for their Work in
30 | part through the use and efforts of others.
31 |
32 | For these and/or other purposes and motivations, and without any
33 | expectation of additional consideration or compensation, the person
34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35 | is an owner of Copyright and Related Rights in the Work, voluntarily
36 | elects to apply CC0 to the Work and publicly distribute the Work under its
37 | terms, with knowledge of his or her Copyright and Related Rights in the
38 | Work and the meaning and intended legal effect of CC0 on those rights.
39 |
40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
41 | protected by copyright and related or neighboring rights ("Copyright and
42 | Related Rights"). Copyright and Related Rights include, but are not
43 | limited to, the following:
44 |
45 | i. the right to reproduce, adapt, distribute, perform, display,
46 | communicate, and translate a Work;
47 | ii. moral rights retained by the original author(s) and/or performer(s);
48 | iii. publicity and privacy rights pertaining to a person's image or
49 | likeness depicted in a Work;
50 | iv. rights protecting against unfair competition in regards to a Work,
51 | subject to the limitations in paragraph 4(a), below;
52 | v. rights protecting the extraction, dissemination, use and reuse of data
53 | in a Work;
54 | vi. database rights (such as those arising under Directive 96/9/EC of the
55 | European Parliament and of the Council of 11 March 1996 on the legal
56 | protection of databases, and under any national implementation
57 | thereof, including any amended or successor version of such
58 | directive); and
59 | vii. other similar, equivalent or corresponding rights throughout the
60 | world based on applicable law or treaty, and any national
61 | implementations thereof.
62 |
63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
65 | irrevocably and unconditionally waives, abandons, and surrenders all of
66 | Affirmer's Copyright and Related Rights and associated claims and causes
67 | of action, whether now known or unknown (including existing as well as
68 | future claims and causes of action), in the Work (i) in all territories
69 | worldwide, (ii) for the maximum duration provided by applicable law or
70 | treaty (including future time extensions), (iii) in any current or future
71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
72 | including without limitation commercial, advertising or promotional
73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74 | member of the public at large and to the detriment of Affirmer's heirs and
75 | successors, fully intending that such Waiver shall not be subject to
76 | revocation, rescission, cancellation, termination, or any other legal or
77 | equitable action to disrupt the quiet enjoyment of the Work by the public
78 | as contemplated by Affirmer's express Statement of Purpose.
79 |
80 | 3. Public License Fallback. Should any part of the Waiver for any reason
81 | be judged legally invalid or ineffective under applicable law, then the
82 | Waiver shall be preserved to the maximum extent permitted taking into
83 | account Affirmer's express Statement of Purpose. In addition, to the
84 | extent the Waiver is so judged Affirmer hereby grants to each affected
85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
88 | maximum duration provided by applicable law or treaty (including future
89 | time extensions), (iii) in any current or future medium and for any number
90 | of copies, and (iv) for any purpose whatsoever, including without
91 | limitation commercial, advertising or promotional purposes (the
92 | "License"). The License shall be deemed effective as of the date CC0 was
93 | applied by Affirmer to the Work. Should any part of the License for any
94 | reason be judged legally invalid or ineffective under applicable law, such
95 | partial invalidity or ineffectiveness shall not invalidate the remainder
96 | of the License, and in such case Affirmer hereby affirms that he or she
97 | will not (i) exercise any of his or her remaining Copyright and Related
98 | Rights in the Work or (ii) assert any associated claims and causes of
99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 |
102 | 4. Limitations and Disclaimers.
103 |
104 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 | surrendered, licensed or otherwise affected by this document.
106 | b. Affirmer offers the Work as-is and makes no representations or
107 | warranties of any kind concerning the Work, express, implied,
108 | statutory or otherwise, including without limitation warranties of
109 | title, merchantability, fitness for a particular purpose, non
110 | infringement, or the absence of latent or other defects, accuracy, or
111 | the present or absence of errors, whether or not discoverable, all to
112 | the greatest extent permissible under applicable law.
113 | c. Affirmer disclaims responsibility for clearing rights of other persons
114 | that may apply to the Work or any use thereof, including without
115 | limitation any person's Copyright and Related Rights in the Work.
116 | Further, Affirmer disclaims responsibility for obtaining any necessary
117 | consents, permissions or other rights required for any use of the
118 | Work.
119 | d. Affirmer understands and acknowledges that Creative Commons is not a
120 | party to this document and has no duty or obligation with respect to
121 | this CC0 or use of the Work.
122 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # text_summarizer
2 |
3 | summarize large amounts of text using langchain, ollama and flask. Spins out a local server using flask, works offline, no data leaves the computer.
4 |
5 | ## How to set up
6 |
7 | - Fork or clone this repository
8 | - Install Ollama for easy interaction with LLMs (`brew install ollama`), follow instructions to spin up the ollama service (`brew services start ollama`)
9 | - Download llm models, e.g.:
10 | - `ollama pull dolphin-llama3`
11 | - `ollama pull llama3`
12 |
13 | Feel free to add and try out other models. For more info on models, visit [ollama library](https://ollama.com/library)
14 | - recommended: create new environment with venv or VScode
15 | - navigate to the local folder of the repo
16 | - install project dependencies (`pip install -r requirements.txt`)
17 |
18 | ## How to use
19 |
20 | - navigate to repo local folder, enter your venv environment if set up
21 | - run `python app.py`
22 | - - interface spins up in localhost, typically [http://127.0.0.1:5000]
23 | - - Open the link in your browser
24 | - provide context to work from - either copy/paste or open a file (which will populate the input window)
25 | - Select model to use - dropdown will list those that are available in ollama.
26 | - you can pick from predefined system prompts and questions, or you can edit it. It is crucial to include {context} and {input} in the system prompt, as otherwise the model won't know what data to base the answer on, and what the question is
27 | - Submit and... patience! Time depends hugely on your system performance and on size of the text
28 | - You'll get the generated response and the request metadata in a page
29 |
--------------------------------------------------------------------------------
/analyzer.py:
--------------------------------------------------------------------------------
1 | from langchain_community.llms import Ollama
2 | from langchain_core.prompts import PromptTemplate
3 | from langchain_core.documents import Document
4 | from langchain_community.embeddings import OllamaEmbeddings
5 | from langchain_community.vectorstores import FAISS
6 | from langchain_text_splitters import SentenceTransformersTokenTextSplitter
7 | from langchain.chains.combine_documents import create_stuff_documents_chain
8 | from langchain.chains import create_retrieval_chain
9 | import datetime
10 |
11 | #from langchain_community.document_loaders import WebBaseLoader
12 |
13 | def get_response(content, question, model, systemprompt):
14 |
15 | starttime = datetime.datetime.now()
16 |
17 | try:
18 | docs = [Document(page_content=content)]
19 | except Exception as e:
20 | return str(e)
21 |
22 | llm = Ollama(model=model)
23 | embeddings = OllamaEmbeddings(model=model)
24 | text_splitter = SentenceTransformersTokenTextSplitter()
25 | documents = text_splitter.split_documents(docs)
26 | vector = FAISS.from_documents(documents, embeddings)
27 |
28 | prompt = PromptTemplate.from_template(systemprompt)
29 |
30 | document_chain = create_stuff_documents_chain(llm, prompt)
31 |
32 | retriever = vector.as_retriever()
33 | retrieval_chain = create_retrieval_chain(retriever, document_chain)
34 |
35 | response = retrieval_chain.invoke({"input": question})
36 |
37 | return(
38 | {
39 | 'response': response["answer"],
40 | 'metadata': {
41 | 'start time': starttime,
42 | 'stop time': datetime.datetime.now(),
43 | 'model used': model,
44 | 'system prompt': systemprompt,
45 | 'question (\{input\})': question,
46 | 'context': content if len(content) < 100000 else content[:10000] + ' \n\n [rest hidden as too large]'
47 | }
48 | })
49 |
50 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request, render_template, redirect, url_for, session
2 | from flask_session import Session
3 | from analyzer import get_response # Update this import based on where you handle your logic
4 | from references import *
5 |
6 | app = Flask(__name__)
7 | app.config['SESSION_TYPE'] = 'filesystem'
8 | sess = Session()
9 | sess.init_app(app)
10 |
11 | @app.route('/', methods=['GET'])
12 | def home():
13 | return render_template('index.html', systemprompts = systemprompts, models = availablemodels)
14 |
15 | @app.route('/submit', methods=['POST'])
16 | def submit():
17 | if request.method == 'POST':
18 | text = request.form['text']
19 | requestedmodel = request.form['model']
20 | system_prompt = request.form['system_prompt']
21 | input_question = request.form['question']
22 |
23 | systemprompt = system_prompt
24 | try:
25 | response_object = get_response(content=text, question=input_question, model=requestedmodel, systemprompt=systemprompt)
26 | session['response_object'] = response_object
27 | return redirect(url_for('response'))
28 | except Exception as e:
29 | return str(e) # For simplicity, just returning the exception as a string.
30 |
31 |
32 | @app.route('/response')
33 | def response():
34 | # Extract the result or handle it via session or other means
35 | response_object = session.get('response_object', 'No response found')
36 | if response_object is None:
37 | return redirect(url_for('/'))
38 | return render_template('response.html', response_object=response_object)
39 |
40 | if __name__ == '__main__':
41 | app.run(debug=True)
42 |
43 |
--------------------------------------------------------------------------------
/references.py:
--------------------------------------------------------------------------------
1 | #edit these as needed. Always include {context} and {input} in the system_prompt if you want relevant responses
2 |
3 | systemprompts = {
4 | "meeting": {
5 | "short_description": "Meeting summary",
6 | "system_prompt":
7 | """You're an efficient and detail oriented executive assistant. You never make things up and you only base your output on available context. Answer the following question based ONLY on the provided context:
8 |
9 |
{{ response_object['response'] }}
14 | 15 | 26 |