├── .github ├── dependabot.yml └── workflows │ ├── publish-to-pypi.yml │ └── test_main.yml ├── .gitignore ├── .python-version ├── LICENSE ├── Makefile ├── README.md ├── pytest.ini ├── setup.py ├── tests ├── __init__.py ├── analysers │ ├── analyser_test.py │ ├── gramformer_test.py │ ├── grammarbot_test.py │ ├── hf_checker_test.py │ ├── hf_completion_test.py │ ├── hf_instruction_checker_test.py │ ├── languagetool_test.py │ ├── ollama_test.py │ └── openai_test.py ├── conftest.py ├── documents │ ├── document_test.py │ ├── latex_test.py │ ├── markdown_test.py │ ├── org_test.py │ └── txt_test.py ├── lsp_test_client │ ├── __init__.py │ ├── defaults.py │ ├── lsp_run.py │ ├── session.py │ └── utils.py ├── server_test.py └── utils_test.py └── textLSP ├── __init__.py ├── analysers ├── __init__.py ├── analyser.py ├── gramformer │ ├── __init__.py │ └── gramformer.py ├── grammarbot │ ├── __init__.py │ └── grammarbot.py ├── handler.py ├── hf_checker │ ├── __init__.py │ └── hf_checker.py ├── hf_completion │ ├── __init__.py │ └── hf_completion.py ├── hf_instruction_checker │ ├── __init__.py │ └── hf_instruction_checker.py ├── languagetool │ ├── __init__.py │ └── languagetool.py ├── ollama │ ├── __init__.py │ └── ollama.py └── openai │ ├── __init__.py │ └── openai.py ├── cli.py ├── documents ├── __init__.py ├── document.py ├── latex │ ├── __init__.py │ └── latex.py ├── markdown │ ├── __init__.py │ └── markdown.py ├── org │ ├── __init__.py │ └── org.py └── txt │ ├── __init__.py │ └── txt.py ├── nn_utils.py ├── server.py ├── types.py ├── utils.py └── workspace.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | ignore: 8 | # no longer supports compilation but most packages are not in pypi yet (#29) 9 | - dependency-name: "tree_sitter" 10 | groups: 11 | python-packages: 12 | patterns: 13 | - "*" 14 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | 8 | jobs: 9 | build-n-publish: 10 | name: Build and publish Python 🐍 distributions 📦 to PyPI 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python 15 | uses: actions/setup-python@v4 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip setuptools 19 | make install-test 20 | - name: Test with pytest 21 | run: | 22 | make test 23 | - name: Install pypa/setuptools 24 | run: >- 25 | python -m 26 | pip install wheel 27 | - name: Build a binary wheel 28 | run: >- 29 | python setup.py sdist bdist_wheel 30 | - name: Publish distribution 📦 to PyPI 31 | uses: pypa/gh-action-pypi-publish@release/v1 32 | with: 33 | password: ${{ secrets.PYPI_API_TOKEN }} 34 | -------------------------------------------------------------------------------- /.github/workflows/test_main.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies and run tests with a single version of Python. 2 | 3 | name: Test main branch 4 | 5 | on: 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Set up Python 18 | uses: actions/setup-python@v4 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip setuptools 22 | make install-test 23 | - name: Test with pytest 24 | run: | 25 | make test 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # custom 132 | coverage.json 133 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | pip install . 3 | 4 | install-development: 5 | pip install -e .[dev,transformers] 6 | 7 | install-test: 8 | pip install .[dev,transformers] 9 | 10 | uninstall: 11 | pip uninstall textLSP 12 | 13 | test: 14 | pytest --cov=textLSP 15 | coverage json 16 | coverage-threshold --file-line-coverage-min 1 --line-coverage-min 0 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # textLSP 2 | 3 | Language server for text spell and grammar check with various AI tools. 4 | 5 | _This tool is in early development._ 6 | 7 | ![textLSP](https://user-images.githubusercontent.com/414596/219856412-8095caa5-9ce6-49fe-9713-78d234837ac4.png) 8 | 9 | ## Features 10 | 11 | ### LSP features 12 | 13 | * Diagnostics: 14 | * spelling or grammatical errors 15 | * Code actions: 16 | * Fix suggestions 17 | * Analyze paragraph with a selected passive analyzer (if the analyzer does not check on save or change) 18 |
Showcase 19 | 20 |
21 | * Only on the first character of the first line: analyze the whole document if it was not fully checked yet 22 |
Showcase 23 | 24 |
25 | * Custom actions defined by a given analyzer 26 |
E.g. OpenAI text generation 27 | 28 |
29 | * Context based word suggestion 30 |
Showcase 31 | 32 |
33 | 34 | ## Analyzers 35 | 36 | ### Local tools 37 | 38 | The following tools run on the local system: 39 | 40 | * [LanguageTool](https://languagetool.org): Mainly for development purposes, see [ltex-ls](https://github.com/valentjn/ltex-ls) for a more mature implementation. 41 | * [Ollama](https://www.ollama.com): Run LLMs efficiently on your local machine. 42 | It supports diagnostics, code actions and prompt based text generation. 43 | * Ollama needs to be [installed manually](https://www.ollama.com/download) first. 44 | * Various [LLMs](https://www.ollama.com/library) are supported, such as `Llama 3`, `Gemma` or `Mixtra`. Suggested model is `Phi3`, due to its speed, size and accuracy. 45 | * hf_checker: Huggingface `text2text-generation` pipeline based analyser. See the [flan-t5-large-grammar-synthesis](https://huggingface.co/pszemraj/flan-t5-large-grammar-synthesis) model for an example. 46 |
Models 47 | 54 |
55 | * hf_instruction_checker: Huggingface `text2text-generation` pipeline based 56 | analyser using instruction tuned models. See the Grammarly's 57 | [CoEdIT](https://github.com/vipulraheja/coedit) model for an example. Supports 58 | error checking and text generation, such as paraphrasing, through the `%HF%` 59 | magic command (see the OpenAI analyser below). 60 |
Models 61 | 70 |
71 | * [hf_completion](https://huggingface.co/docs/transformers/task_summary#language-modeling): Huggingface `fill-mask` pipeline based text completion. 72 | * [Gramformer](https://github.com/PrithivirajDamodaran/Gramformer): Neural network based system. 73 | 74 | ### Tools using remote services 75 | 76 | **DISCLAIMER: THE RELATED APIS REQUIRE REGISTRATION AND ARE NOT FREE TO USE! USE THESE ANALYZERS ON YOUR OWN RESPONSIBILITY! THE AUTHORS OF TEXTLSP DO NOT ASSUME ANY RESPONSIBILITY FOR THE COSTS INCURRED!** 77 | 78 | The following tools use remote text APIs. 79 | Due to potential costs turning off automatic analysis if suggested. 80 | 81 | * [OpenAI](https://openai.com/api): Supports text correction as well as text generation through a magic command in the text file. 82 | * A custom URL can be set to use an OpenAI-compatible server. See the example 83 | [configuration](#configuration) below. 84 | 85 |
Generation showcase 86 | 87 |
88 | * [GrammarBot](https://rapidapi.com/grammarbot/api/grammarbot): The GrammarBot API provides spelling and grammar checking. 89 | 90 | ## Supported File Types 91 | 92 | * latex 93 | * org 94 | * markdown 95 | * any other file types as plain text 96 | 97 | ## Setup 98 | 99 | ### Install 100 | ``` 101 | pip install textLSP 102 | ``` 103 | 104 | For the latest version: 105 | ``` 106 | pip install git+https://github.com/hangyav/textLSP 107 | ``` 108 | 109 | #### Additional dependencies 110 | Some analyzers need additional dependencies! 111 | 112 | * hf_checker, hf_instruction_checker and hf_completion: 113 | ``` 114 | pip install textLSP[transformers] 115 | ``` 116 | 117 | * Gramformer needs to be installed manually: 118 | ``` 119 | pip install git+https://github.com/PrithivirajDamodaran/Gramformer.git 120 | ``` 121 | 122 | ### Running 123 | Simply run: 124 | ``` 125 | textlsp 126 | ``` 127 | 128 | Since some analyzers are computation intensive, consider running it on a server using the TCP interface: 129 | ``` 130 | textlsp --address 0.0.0.0 --port 1234 131 | ``` 132 | or simply over ssh (with ssh key) if the client doesn't support it: 133 | ``` 134 | ssh textlsp 135 | ``` 136 | 137 | ### Configuration 138 | 139 | Using textLSP within an editor depends on the editor of choice. 140 | For a few examples how to set up language servers in general in some of the popular editors see [here](https://github.com/openlawlibrary/pygls/tree/master/examples/hello-world#editor-configurations) or take a look at the related documentation of your editor. 141 | 142 | By default, all analyzers are disabled in textLSP, they have to be turned on in the settings. 143 | Example configuration in lua for nvim (other editors should be set up accordingly): 144 | 145 | ```lua 146 | textLSP = { 147 | analysers = { 148 | languagetool = { 149 | enabled = true, 150 | check_text = { 151 | on_open = true, 152 | on_save = true, 153 | on_change = false, 154 | } 155 | }, 156 | ollama = { 157 | enabled = true, 158 | check_text = { 159 | on_open = false, 160 | on_save = true, 161 | on_change = false, 162 | }, 163 | model = "phi3:3.8b-instruct", -- smaller but faster model 164 | -- model = "phi3:14b-instruct", -- more accurate 165 | max_token = 50, 166 | }, 167 | gramformer = { 168 | -- gramformer dependency needs to be installed manually 169 | enabled = false, 170 | gpu = false, 171 | check_text = { 172 | on_open = false, 173 | on_save = true, 174 | on_change = false, 175 | } 176 | }, 177 | hf_checker = { 178 | enabled = false, 179 | gpu = false, 180 | quantize=32, 181 | model='pszemraj/flan-t5-large-grammar-synthesis', 182 | min_length=40, 183 | check_text = { 184 | on_open = false, 185 | on_save = true, 186 | on_change = false, 187 | } 188 | }, 189 | hf_instruction_checker = { 190 | enabled = false, 191 | gpu = false, 192 | quantize=32, 193 | model='grammarly/coedit-large', 194 | min_length=40, 195 | check_text = { 196 | on_open = false, 197 | on_save = true, 198 | on_change = false, 199 | } 200 | }, 201 | hf_completion = { 202 | enabled = false, 203 | gpu = false, 204 | quantize=32, 205 | model='bert-base-multilingual-cased', 206 | topk=5, 207 | }, 208 | openai = { 209 | enabled = false, 210 | api_key = '', 211 | -- url = '' -- optional to use an OpenAI-compatible server 212 | check_text = { 213 | on_open = false, 214 | on_save = false, 215 | on_change = false, 216 | }, 217 | model = 'gpt-3.5-turbo', 218 | max_token = 16, 219 | }, 220 | grammarbot = { 221 | enabled = false, 222 | api_key = '', 223 | -- longer texts are split, this parameter sets the maximum number of splits per analysis 224 | input_max_requests = 1, 225 | check_text = { 226 | on_open = false, 227 | on_save = false, 228 | on_change = false, 229 | } 230 | }, 231 | }, 232 | documents = { 233 | -- the language of the documents, could be set to `auto` of `auto:` 234 | -- to detect automatically, default: auto:en 235 | language = "auto:en", 236 | -- do not autodetect documents with fewer characters 237 | min_length_language_detect = 20, 238 | org = { 239 | org_todo_keywords = { 240 | 'TODO', 241 | 'IN_PROGRESS', 242 | 'DONE' 243 | }, 244 | }, 245 | txt = { 246 | parse = true, 247 | }, 248 | }, 249 | } 250 | ``` 251 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = 3 | tests 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | 5 | # Utility function to read the README file. 6 | def read(fname): 7 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 8 | 9 | 10 | setup( 11 | name="textLSP", 12 | version="0.3.2", 13 | author="Viktor Hangya", 14 | author_email="hangyav@gmail.com", 15 | description=("Language server for text spell and grammar check with various tools."), 16 | license="GPLv3", 17 | url="https://github.com/hangyav/textLSP", 18 | packages=find_packages(include=['textLSP*']), 19 | long_description=read('README.md'), 20 | long_description_content_type="text/markdown", 21 | classifiers=[ 22 | "Programming Language :: Python :: 3", 23 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 24 | "Operating System :: OS Independent", 25 | ], 26 | entry_points={ 27 | 'console_scripts': ['textlsp=textLSP.cli:main'], 28 | }, 29 | install_requires=[ 30 | 'pygls==1.3.1', 31 | 'lsprotocol==2023.0.1', 32 | 'language-tool-python==2.9.3', 33 | 'tree_sitter==0.21.3', 34 | 'gitpython==3.1.44', 35 | 'appdirs==1.4.4', 36 | 'openai==1.76.2', 37 | 'sortedcontainers==2.4.0', 38 | 'langdetect==1.0.9', 39 | 'ollama==0.4.8', 40 | ], 41 | extras_require={ 42 | 'dev': [ 43 | 'pytest==8.3.5', 44 | 'python-lsp-jsonrpc==1.1.2', 45 | 'pytest-cov==6.1.1', 46 | 'coverage-threshold==0.5.0' 47 | ], 48 | 'transformers': [ 49 | 'torch==2.7.0', 50 | 'transformers==4.51.3', 51 | 'bitsandbytes==0.45.5', 52 | ], 53 | }, 54 | ) 55 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangyav/textLSP/d23c638521847a0b9a6b9b864df2ccec14f1856e/tests/__init__.py -------------------------------------------------------------------------------- /tests/analysers/analyser_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from threading import Event 4 | from lsprotocol.types import ( 5 | DidOpenTextDocumentParams, 6 | TextDocumentItem, 7 | DidChangeTextDocumentParams, 8 | VersionedTextDocumentIdentifier, 9 | TextDocumentContentChangeEvent_Type1, 10 | Range, 11 | Position, 12 | DidSaveTextDocumentParams, 13 | TextDocumentIdentifier, 14 | CodeActionParams, 15 | CodeActionContext, 16 | Diagnostic, 17 | ) 18 | 19 | from tests.lsp_test_client import session, utils 20 | 21 | 22 | @pytest.mark.parametrize('text,edit,exp', [ 23 | ( 24 | 'This is a sentence.\n' 25 | 'This is a sAntence with an error.\n' 26 | 'And another sentence.', 27 | ( 28 | Range( 29 | start=Position(line=2, character=0), 30 | end=Position(line=2, character=0), 31 | ), 32 | '\n', 33 | False 34 | ), 35 | Range( 36 | start=Position(line=1, character=10), 37 | end=Position(line=1, character=18), 38 | ), 39 | ), 40 | ( 41 | 'This is a sentence.\n' 42 | 'This is a sAntence with an error.\n' 43 | 'And another sentence.', 44 | ( 45 | Range( 46 | start=Position(line=0, character=0), 47 | end=Position(line=0, character=0), 48 | ), 49 | '\n\n\n', 50 | True 51 | ), 52 | Range( 53 | start=Position(line=4, character=10), 54 | end=Position(line=4, character=18), 55 | ), 56 | ), 57 | ( 58 | 'This is a sentence.\n' 59 | 'This is a sAntence with an error.\n' 60 | 'And another sentence.', 61 | ( 62 | Range( 63 | start=Position(line=0, character=0), 64 | end=Position(line=1, character=0), 65 | ), 66 | '', 67 | True 68 | ), 69 | Range( 70 | start=Position(line=0, character=10), 71 | end=Position(line=0, character=18), 72 | ), 73 | ), 74 | ( 75 | 'This is a sentence.\n' 76 | 'This is a sAntence with an error.\n' 77 | 'And another sentence.', 78 | ( 79 | Range( 80 | start=Position(line=1, character=23), 81 | end=Position(line=1, character=23), 82 | ), 83 | '\n', 84 | False 85 | ), 86 | Range( 87 | start=Position(line=1, character=10), 88 | end=Position(line=1, character=18), 89 | ), 90 | ), 91 | ( 92 | 'This is a sentence.\n' 93 | 'This is a sAntence with an error.\n' 94 | 'And another sentence.', 95 | ( 96 | Range( 97 | start=Position(line=1, character=33), 98 | end=Position(line=1, character=33), 99 | ), 100 | ' too', 101 | False 102 | ), 103 | Range( 104 | start=Position(line=1, character=10), 105 | end=Position(line=1, character=18), 106 | ), 107 | ), 108 | ( 109 | 'This is a sentence.\n' 110 | 'This is a sAntence with an error.\n' 111 | 'And another sentence.', 112 | ( 113 | Range( 114 | start=Position(line=1, character=4), 115 | end=Position(line=1, character=4), 116 | ), 117 | ' word', 118 | True 119 | ), 120 | Range( 121 | start=Position(line=1, character=15), 122 | end=Position(line=1, character=23), 123 | ), 124 | ), 125 | ( 126 | 'This is a sentence.\n' 127 | 'This is a sAntence with an error.\n' 128 | 'And another sentence.', 129 | ( 130 | Range( 131 | start=Position(line=1, character=4), 132 | end=Position(line=1, character=4), 133 | ), 134 | '\n', 135 | True 136 | ), 137 | Range( 138 | start=Position(line=2, character=5), 139 | end=Position(line=2, character=13), 140 | ), 141 | ), 142 | ]) 143 | def test_line_shifts(text, edit, exp, json_converter, langtool_ls_onsave): 144 | done = Event() 145 | diag_lst = list() 146 | 147 | langtool_ls_onsave.set_notification_callback( 148 | session.PUBLISH_DIAGNOSTICS, 149 | utils.get_notification_handler( 150 | event=done, 151 | results=diag_lst 152 | ), 153 | ) 154 | 155 | open_params = DidOpenTextDocumentParams( 156 | TextDocumentItem( 157 | uri='dummy.txt', 158 | language_id='txt', 159 | version=1, 160 | text=text, 161 | ) 162 | ) 163 | 164 | langtool_ls_onsave.notify_did_open( 165 | json_converter.unstructure(open_params) 166 | ) 167 | assert done.wait(30) 168 | done.clear() 169 | 170 | change_params = DidChangeTextDocumentParams( 171 | text_document=VersionedTextDocumentIdentifier( 172 | version=1, 173 | uri='dummy.txt', 174 | ), 175 | content_changes=[ 176 | TextDocumentContentChangeEvent_Type1( 177 | edit[0], 178 | edit[1], 179 | ) 180 | ] 181 | ) 182 | langtool_ls_onsave.notify_did_change( 183 | json_converter.unstructure(change_params) 184 | ) 185 | ret = done.wait(1) 186 | done.clear() 187 | 188 | # no diagnostics notification if none has changed 189 | assert ret == edit[2] 190 | if edit[2]: 191 | assert len(diag_lst) == 2 192 | else: 193 | assert len(diag_lst) == 1 194 | 195 | res = diag_lst[-1]['diagnostics'][0]['range'] 196 | assert res == json_converter.unstructure(exp) 197 | 198 | diag = diag_lst[-1]['diagnostics'][0] 199 | diag = Diagnostic( 200 | range=Range( 201 | start=Position(**res['start']), 202 | end=Position(**res['end']), 203 | ), 204 | message=diag['message'], 205 | ) 206 | code_action_params = CodeActionParams( 207 | TextDocumentIdentifier('dummy.txt'), 208 | exp, 209 | CodeActionContext([diag]), 210 | ) 211 | actions_lst = langtool_ls_onsave.text_document_code_action( 212 | json_converter.unstructure(code_action_params) 213 | ) 214 | assert len(actions_lst) == 1 215 | res = actions_lst[-1]['diagnostics'][0]['range'] 216 | assert res == json_converter.unstructure(exp) 217 | 218 | 219 | @pytest.mark.parametrize('text,edit,exp', [ 220 | ( 221 | 'Introduction\n' 222 | '\n' 223 | 'This is a sentence.\n' 224 | 'This is another.\n' 225 | '\n' 226 | 'Thes is bold.', 227 | ( 228 | Range( 229 | start=Position(line=1, character=0), 230 | end=Position(line=1, character=0), 231 | ), 232 | '\n\n', 233 | ), 234 | Range( 235 | start=Position(line=7, character=0), 236 | end=Position(line=7, character=7), 237 | ), 238 | ), 239 | ]) 240 | def test_diagnostics_bug1(text, edit, exp, json_converter, langtool_ls_onsave): 241 | done = Event() 242 | results = list() 243 | 244 | langtool_ls_onsave.set_notification_callback( 245 | session.PUBLISH_DIAGNOSTICS, 246 | utils.get_notification_handler( 247 | event=done, 248 | results=results 249 | ), 250 | ) 251 | 252 | open_params = DidOpenTextDocumentParams( 253 | TextDocumentItem( 254 | uri='dummy.txt', 255 | language_id='txt', 256 | version=1, 257 | text=text, 258 | ) 259 | ) 260 | 261 | langtool_ls_onsave.notify_did_open( 262 | json_converter.unstructure(open_params) 263 | ) 264 | assert done.wait(30) 265 | done.clear() 266 | 267 | change_params = DidChangeTextDocumentParams( 268 | text_document=VersionedTextDocumentIdentifier( 269 | version=1, 270 | uri='dummy.txt', 271 | ), 272 | content_changes=[ 273 | TextDocumentContentChangeEvent_Type1( 274 | edit[0], 275 | edit[1], 276 | ) 277 | ] 278 | ) 279 | langtool_ls_onsave.notify_did_change( 280 | json_converter.unstructure(change_params) 281 | ) 282 | assert done.wait(30) 283 | done.clear() 284 | 285 | save_params = DidSaveTextDocumentParams( 286 | text_document=TextDocumentIdentifier( 287 | 'dummy.txt' 288 | ) 289 | ) 290 | langtool_ls_onsave.notify_did_save( 291 | json_converter.unstructure(save_params) 292 | ) 293 | assert done.wait(30) 294 | done.clear() 295 | 296 | res = results[-1]['diagnostics'][0]['range'] 297 | assert res == json_converter.unstructure(exp) 298 | 299 | 300 | def test_diagnostics_bug2(json_converter, langtool_ls_onsave): 301 | text = ('\\documentclass[11pt]{article}\n' 302 | + '\\begin{document}\n' 303 | + 'o\n' 304 | + '\\section{Thes}\n' 305 | + '\n' 306 | + 'This is a sentence.\n' 307 | + '\n' 308 | + '\\end{document}') 309 | 310 | done = Event() 311 | results = list() 312 | 313 | langtool_ls_onsave.set_notification_callback( 314 | session.PUBLISH_DIAGNOSTICS, 315 | utils.get_notification_handler( 316 | event=done, 317 | results=results 318 | ), 319 | ) 320 | 321 | open_params = DidOpenTextDocumentParams( 322 | TextDocumentItem( 323 | uri='dummy.tex', 324 | language_id='tex', 325 | version=1, 326 | text=text, 327 | ) 328 | ) 329 | 330 | langtool_ls_onsave.notify_did_open( 331 | json_converter.unstructure(open_params) 332 | ) 333 | assert done.wait(60) 334 | done.clear() 335 | 336 | change_params = DidChangeTextDocumentParams( 337 | text_document=VersionedTextDocumentIdentifier( 338 | version=1, 339 | uri='dummy.tex', 340 | ), 341 | content_changes=[ 342 | TextDocumentContentChangeEvent_Type1( 343 | Range( 344 | start=Position(line=2, character=0), 345 | end=Position(line=3, character=0), 346 | ), 347 | '', 348 | ) 349 | ] 350 | ) 351 | langtool_ls_onsave.notify_did_change( 352 | json_converter.unstructure(change_params) 353 | ) 354 | assert done.wait(60) 355 | done.clear() 356 | 357 | save_params = DidSaveTextDocumentParams( 358 | text_document=TextDocumentIdentifier( 359 | 'dummy.tex' 360 | ) 361 | ) 362 | langtool_ls_onsave.notify_did_save( 363 | json_converter.unstructure(save_params) 364 | ) 365 | assert done.wait(60) 366 | done.clear() 367 | 368 | change_params = DidChangeTextDocumentParams( 369 | text_document=VersionedTextDocumentIdentifier( 370 | version=2, 371 | uri='dummy.tex', 372 | ), 373 | content_changes=[ 374 | TextDocumentContentChangeEvent_Type1( 375 | Range( 376 | start=Position(line=1, character=16), 377 | end=Position(line=2, character=0), 378 | ), 379 | '\no\n', 380 | ) 381 | ] 382 | ) 383 | langtool_ls_onsave.notify_did_change( 384 | json_converter.unstructure(change_params) 385 | ) 386 | assert done.wait(60) 387 | done.clear() 388 | 389 | save_params = DidSaveTextDocumentParams( 390 | text_document=TextDocumentIdentifier( 391 | 'dummy.tex' 392 | ) 393 | ) 394 | langtool_ls_onsave.notify_did_save( 395 | json_converter.unstructure(save_params) 396 | ) 397 | assert done.wait(60) 398 | done.clear() 399 | 400 | exp_lst = [ 401 | Range( 402 | start=Position(line=2, character=0), 403 | end=Position(line=2, character=1), 404 | ), 405 | Range( 406 | start=Position(line=3, character=9), 407 | end=Position(line=3, character=13), 408 | ), 409 | ] 410 | res_lst = results[-1]['diagnostics'] 411 | assert len(res_lst) == len(exp_lst) 412 | for exp, res in zip(exp_lst, res_lst): 413 | assert res['range'] == json_converter.unstructure(exp) 414 | 415 | 416 | def test_diagnostics_bug3(json_converter, langtool_ls_onsave): 417 | text = ('Thiiiis is paragraph one.\n' 418 | '\n' 419 | '\n' 420 | '\n' 421 | 'Sentence one. Sentence two.\n') 422 | 423 | done = Event() 424 | results = list() 425 | 426 | langtool_ls_onsave.set_notification_callback( 427 | session.PUBLISH_DIAGNOSTICS, 428 | utils.get_notification_handler( 429 | event=done, 430 | results=results 431 | ), 432 | ) 433 | 434 | open_params = DidOpenTextDocumentParams( 435 | TextDocumentItem( 436 | uri='dummy.md', 437 | language_id='md', 438 | version=1, 439 | text=text, 440 | ) 441 | ) 442 | 443 | langtool_ls_onsave.notify_did_open( 444 | json_converter.unstructure(open_params) 445 | ) 446 | assert done.wait(30) 447 | done.clear() 448 | 449 | change_params = DidChangeTextDocumentParams( 450 | text_document=VersionedTextDocumentIdentifier( 451 | version=1, 452 | uri='dummy.md', 453 | ), 454 | content_changes=[ 455 | TextDocumentContentChangeEvent_Type1( 456 | range=Range( 457 | start=Position(line=2, character=0), 458 | end=Position(line=2, character=0) 459 | ), 460 | text='A' 461 | ), 462 | TextDocumentContentChangeEvent_Type1( 463 | range=Range( 464 | start=Position(line=2, character=1), 465 | end=Position(line=2, character=1) 466 | ), 467 | text='s' 468 | ), 469 | TextDocumentContentChangeEvent_Type1( 470 | range=Range( 471 | start=Position(line=2, character=2), 472 | end=Position(line=2, character=2) 473 | ), 474 | text='d' 475 | ), 476 | ] 477 | ) 478 | langtool_ls_onsave.notify_did_change( 479 | json_converter.unstructure(change_params) 480 | ) 481 | assert not done.wait(10) 482 | done.clear() 483 | 484 | save_params = DidSaveTextDocumentParams( 485 | text_document=TextDocumentIdentifier( 486 | 'dummy.md' 487 | ) 488 | ) 489 | langtool_ls_onsave.notify_did_save( 490 | json_converter.unstructure(save_params) 491 | ) 492 | assert done.wait(30) 493 | done.clear() 494 | 495 | exp_lst = [ 496 | Range( 497 | start=Position(line=0, character=0), 498 | end=Position(line=0, character=7), 499 | ), 500 | Range( 501 | start=Position(line=2, character=0), 502 | end=Position(line=2, character=3), 503 | ), 504 | ] 505 | res_lst = results[-1]['diagnostics'] 506 | assert len(res_lst) == len(exp_lst) 507 | for exp, res in zip(exp_lst, res_lst): 508 | assert res['range'] == json_converter.unstructure(exp) 509 | -------------------------------------------------------------------------------- /tests/analysers/gramformer_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | try: 4 | # gramformer is not on pypi thus not installed automatically 5 | from gramformer import Gramformer 6 | except ModuleNotFoundError: 7 | import sys 8 | import subprocess 9 | subprocess.check_call([ 10 | sys.executable, 11 | "-m", 12 | "pip", 13 | "install", 14 | 'git+https://github.com/PrithivirajDamodaran/Gramformer.git' 15 | ]) 16 | 17 | from textLSP.documents.document import BaseDocument 18 | from textLSP.analysers.gramformer import GramformerAnalyser 19 | 20 | 21 | @pytest.fixture 22 | def analyser(): 23 | return GramformerAnalyser( 24 | None, 25 | {}, 26 | 'gramformer', 27 | ) 28 | 29 | 30 | def test_analyse(analyser): 31 | doc = BaseDocument( 32 | 'tmp.txt', 33 | 'This is a santance. And another.', 34 | config={}, 35 | version=0 36 | ) 37 | analyser._analyse_sentences(doc.cleaned_source, doc) 38 | -------------------------------------------------------------------------------- /tests/analysers/grammarbot_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from lsprotocol.types import Range, Position 4 | 5 | from textLSP.analysers.grammarbot import GrammarBotAnalyser 6 | from textLSP.documents.document import BaseDocument 7 | 8 | 9 | @pytest.fixture 10 | def analyser(): 11 | return GrammarBotAnalyser( 12 | None, 13 | {GrammarBotAnalyser.CONFIGURATION_API_KEY: 'DUMMY_KEY'}, 14 | 'grammarbot', 15 | ) 16 | 17 | 18 | @pytest.mark.parametrize('doc,analyses,text_sections,exp', [ 19 | ( 20 | BaseDocument( 21 | 'DUMMY_URL', 22 | 'This is a sentence.' 23 | ), 24 | [ 25 | { 26 | 'offset': 0, 27 | 'length': 5, 28 | 'message': 'test', 29 | 'rule': {'id': 'TEST'}, 30 | 'replacements': [], 31 | }, 32 | ], 33 | None, 34 | [ 35 | Range( 36 | start=Position( 37 | line=0, 38 | character=0 39 | ), 40 | end=Position( 41 | line=0, 42 | character=5 43 | ), 44 | 45 | ), 46 | ], 47 | ), 48 | ( 49 | BaseDocument( 50 | 'DUMMY_URL', 51 | 'This is a paragraph.\n\n' 52 | 'This is a paragraph.\n\n' 53 | 'This is a paragraph.\n\n' 54 | ), 55 | [ 56 | { 57 | 'offset': 0, 58 | 'length': 5, 59 | 'message': 'test', 60 | 'rule': {'id': 'TEST'}, 61 | 'replacements': [], 62 | }, 63 | ], 64 | [ 65 | (22, 21), # second paragraph 66 | ], 67 | [ 68 | Range( 69 | start=Position( 70 | line=2, 71 | character=0 72 | ), 73 | end=Position( 74 | line=2, 75 | character=5 76 | ), 77 | ), 78 | ], 79 | ), 80 | ( 81 | BaseDocument( 82 | 'DUMMY_URL', 83 | 'This is a paragraph.\n\n' 84 | 'This is a paragraph.\n\n' 85 | 'This is a paragraph.\n\n' 86 | ), 87 | [ 88 | { 89 | 'offset': 5, 90 | 'length': 2, 91 | 'message': 'test', 92 | 'rule': {'id': 'TEST'}, 93 | 'replacements': [], 94 | }, 95 | ], 96 | [ 97 | (22, 21), # second paragraph 98 | ], 99 | [ 100 | Range( 101 | start=Position( 102 | line=2, 103 | character=5 104 | ), 105 | end=Position( 106 | line=2, 107 | character=7 108 | ), 109 | ), 110 | ], 111 | ), 112 | ( 113 | BaseDocument( 114 | 'DUMMY_URL', 115 | 'This is a paragraph.\n\n' 116 | 'This is a paragraph.\n\n' 117 | 'This is a paragraph.\n\n' 118 | ), 119 | [ 120 | { 121 | 'offset': 5, 122 | 'length': 2, 123 | 'message': 'test', 124 | 'rule': {'id': 'TEST'}, 125 | 'replacements': [], 126 | }, 127 | { 128 | 'offset': 5 + len('This is a paragraph.\n'), 129 | 'length': 2, 130 | 'message': 'test', 131 | 'rule': {'id': 'TEST'}, 132 | 'replacements': [], 133 | }, 134 | ], 135 | [ 136 | (0, len('This is a paragraph.\n')), 137 | # third paragraph 138 | ( 139 | len('This is a paragraph.\n\nThis is a paragraph.\n\n'), 140 | 2*len('This is a paragraph.\n') 141 | ), 142 | ], 143 | [ 144 | Range( 145 | start=Position( 146 | line=0, 147 | character=5 148 | ), 149 | end=Position( 150 | line=0, 151 | character=7 152 | ), 153 | ), 154 | Range( 155 | start=Position( 156 | line=4, 157 | character=5 158 | ), 159 | end=Position( 160 | line=4, 161 | character=7 162 | ), 163 | ), 164 | ], 165 | ), 166 | ]) 167 | def test_analyses(doc, analyses, text_sections, exp, analyser): 168 | res_diag, res_action = analyser._handle_analyses(doc, analyses, text_sections) 169 | 170 | assert [diag.range for diag in res_diag] == exp 171 | -------------------------------------------------------------------------------- /tests/analysers/hf_checker_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textLSP.analysers.hf_checker import HFCheckerAnalyser 4 | from textLSP.documents.document import BaseDocument 5 | 6 | 7 | @pytest.fixture 8 | def analyser(): 9 | return HFCheckerAnalyser( 10 | None, 11 | { 12 | HFCheckerAnalyser.CONFIGURATION_MIN_LENGTH: 40, 13 | HFCheckerAnalyser.CONFIGURATION_MODEL: 'pszemraj/grammar-synthesis-small', 14 | }, 15 | 'hf_checker', 16 | ) 17 | 18 | 19 | @pytest.mark.parametrize('doc,exp', [ 20 | ( 21 | BaseDocument( 22 | 'DUMMY_URL', 23 | 'This is a short sentence.', 24 | version=1, 25 | ), 26 | False, 27 | ), 28 | ( 29 | BaseDocument( 30 | 'DUMMY_URL', 31 | 'This is a long enough sentence with an eror or tvo.', 32 | version=1, 33 | ), 34 | True, 35 | ), 36 | ]) 37 | def test_simple(doc, exp, analyser): 38 | res_diag, res_action = analyser._analyse_lines(doc.cleaned_source, doc) 39 | 40 | if exp: 41 | assert len(res_diag) > 0 42 | assert len(res_action) > 0 43 | else: 44 | assert len(res_diag) == 0 45 | assert len(res_action) == 0 46 | -------------------------------------------------------------------------------- /tests/analysers/hf_completion_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textLSP.analysers.hf_completion import HFCompletionAnalyser 4 | 5 | 6 | @pytest.fixture 7 | def analyser(): 8 | return HFCompletionAnalyser( 9 | None, 10 | {}, 11 | 'hf_completion', 12 | ) 13 | 14 | 15 | def test_simple(analyser): 16 | text = 'The next word should be ' 17 | analyser._get_text_completions(text, len(text)-1) 18 | -------------------------------------------------------------------------------- /tests/analysers/hf_instruction_checker_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textLSP.analysers.hf_instruction_checker import HFInstructionCheckerAnalyser 4 | from textLSP.documents.document import BaseDocument 5 | 6 | 7 | @pytest.fixture 8 | def analyser(): 9 | return HFInstructionCheckerAnalyser( 10 | None, 11 | { 12 | HFInstructionCheckerAnalyser.CONFIGURATION_MODEL: 'grammarly/coedit-large', 13 | }, 14 | 'hf_checker', 15 | ) 16 | 17 | 18 | @pytest.mark.parametrize('doc,exp', [ 19 | ( 20 | BaseDocument( 21 | 'DUMMY_URL', 22 | 'This is a short sentence.', 23 | version=1, 24 | ), 25 | False, 26 | ), 27 | ( 28 | BaseDocument( 29 | 'DUMMY_URL', 30 | 'This is a long enough sentence with an eror or tvo.', 31 | version=1, 32 | ), 33 | True, 34 | ), 35 | ]) 36 | def test_simple(doc, exp, analyser): 37 | res_diag, res_action = analyser._analyse_lines(doc.cleaned_source, doc) 38 | 39 | if exp: 40 | assert len(res_diag) > 0 41 | assert len(res_action) > 0 42 | else: 43 | assert len(res_diag) == 0 44 | assert len(res_action) == 0 45 | -------------------------------------------------------------------------------- /tests/analysers/languagetool_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from threading import Event 4 | from lsprotocol.types import ( 5 | DidOpenTextDocumentParams, 6 | TextDocumentItem, 7 | DidChangeTextDocumentParams, 8 | VersionedTextDocumentIdentifier, 9 | TextDocumentContentChangeEvent_Type1, 10 | Range, 11 | Position, 12 | DidSaveTextDocumentParams, 13 | TextDocumentIdentifier, 14 | ) 15 | from textLSP.documents.document import BaseDocument 16 | from textLSP.analysers.languagetool import LanguageToolAnalyser 17 | 18 | from tests.lsp_test_client import session, utils 19 | 20 | 21 | @pytest.fixture 22 | def analyser(): 23 | return LanguageToolAnalyser( 24 | None, 25 | {}, 26 | 'languagetool', 27 | ) 28 | 29 | 30 | def test_analyse(analyser): 31 | doc = BaseDocument( 32 | 'tmp.txt', 33 | 'This is a santance.', 34 | config={}, 35 | version=0 36 | ) 37 | analyser._analyse(doc.cleaned_source, doc) 38 | 39 | 40 | def test_bug1(json_converter, langtool_ls_onsave): 41 | text = ('\\documentclass[11pt]{article}\n' 42 | + '\\begin{document}\n' 43 | + '\n' 44 | + '\\section{Introduction}\n' 45 | + '\n' 46 | + 'This is a sentence.\n' 47 | + '\n' 48 | + '\\end{document}') 49 | 50 | done = Event() 51 | results = list() 52 | 53 | langtool_ls_onsave.set_notification_callback( 54 | session.WINDOW_SHOW_MESSAGE, 55 | utils.get_notification_handler( 56 | event=done, 57 | results=results 58 | ), 59 | ) 60 | 61 | open_params = DidOpenTextDocumentParams( 62 | TextDocumentItem( 63 | uri='dummy.tex', 64 | language_id='tex', 65 | version=1, 66 | text=text, 67 | ) 68 | ) 69 | 70 | langtool_ls_onsave.notify_did_open( 71 | json_converter.unstructure(open_params) 72 | ) 73 | 74 | change_params = DidChangeTextDocumentParams( 75 | text_document=VersionedTextDocumentIdentifier( 76 | version=1, 77 | uri='dummy.tex', 78 | ), 79 | content_changes=[ 80 | TextDocumentContentChangeEvent_Type1( 81 | Range( 82 | start=Position(line=5, character=19), 83 | end=Position(line=6, character=0), 84 | ), 85 | '\nThis is a sentence.\n', 86 | ) 87 | ] 88 | ) 89 | langtool_ls_onsave.notify_did_change( 90 | json_converter.unstructure(change_params) 91 | ) 92 | 93 | change_params = DidChangeTextDocumentParams( 94 | text_document=VersionedTextDocumentIdentifier( 95 | version=2, 96 | uri='dummy.tex', 97 | ), 98 | content_changes=[ 99 | TextDocumentContentChangeEvent_Type1( 100 | Range( 101 | start=Position(line=6, character=19), 102 | end=Position(line=7, character=0), 103 | ), 104 | '\nThis is a sentence.\n', 105 | ) 106 | ] 107 | ) 108 | langtool_ls_onsave.notify_did_change( 109 | json_converter.unstructure(change_params) 110 | ) 111 | 112 | save_params = DidSaveTextDocumentParams( 113 | text_document=TextDocumentIdentifier( 114 | 'dummy.tex' 115 | ) 116 | ) 117 | langtool_ls_onsave.notify_did_save( 118 | json_converter.unstructure(save_params) 119 | ) 120 | assert not done.wait(20) 121 | done.clear() 122 | 123 | 124 | def test_bug2(json_converter, langtool_ls_onsave): 125 | text = ( 126 | 'This is a sentence.\n' 127 | + 'This is a sentence.\n' 128 | + 'This is a sentence.\n' 129 | ) 130 | 131 | done = Event() 132 | results = list() 133 | 134 | langtool_ls_onsave.set_notification_callback( 135 | session.WINDOW_SHOW_MESSAGE, 136 | utils.get_notification_handler( 137 | event=done, 138 | results=results 139 | ), 140 | ) 141 | 142 | open_params = DidOpenTextDocumentParams( 143 | TextDocumentItem( 144 | uri='dummy.txt', 145 | language_id='txt', 146 | version=1, 147 | text=text, 148 | ) 149 | ) 150 | 151 | langtool_ls_onsave.notify_did_open( 152 | json_converter.unstructure(open_params) 153 | ) 154 | 155 | for i, edit_range in enumerate([ 156 | # Last two sentences deleted as done by nvim 157 | Range( 158 | start=Position(line=0, character=19), 159 | end=Position(line=0, character=19), 160 | ), 161 | Range( 162 | start=Position(line=1, character=0), 163 | end=Position(line=2, character=0), 164 | ), 165 | Range( 166 | start=Position(line=1, character=0), 167 | end=Position(line=1, character=19), 168 | ), 169 | Range( 170 | start=Position(line=0, character=19), 171 | end=Position(line=0, character=19), 172 | ), 173 | Range( 174 | start=Position(line=1, character=0), 175 | end=Position(line=2, character=0), 176 | ), 177 | ], 1): 178 | change_params = DidChangeTextDocumentParams( 179 | text_document=VersionedTextDocumentIdentifier( 180 | version=i, 181 | uri='dummy.txt', 182 | ), 183 | content_changes=[ 184 | TextDocumentContentChangeEvent_Type1( 185 | edit_range, 186 | '', 187 | ) 188 | ] 189 | ) 190 | langtool_ls_onsave.notify_did_change( 191 | json_converter.unstructure(change_params) 192 | ) 193 | 194 | save_params = DidSaveTextDocumentParams( 195 | text_document=TextDocumentIdentifier( 196 | 'dummy.txt' 197 | ) 198 | ) 199 | langtool_ls_onsave.notify_did_save( 200 | json_converter.unstructure(save_params) 201 | ) 202 | assert not done.wait(20) 203 | done.clear() 204 | -------------------------------------------------------------------------------- /tests/analysers/ollama_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textLSP.analysers.ollama import OllamaAnalyser 4 | from textLSP.types import ConfigurationError 5 | 6 | 7 | def test_init(): 8 | # there's no easy way to test this. So this is just a 9 | # placeholder for test coverage. 10 | with pytest.raises(ConfigurationError): 11 | return OllamaAnalyser( 12 | None, 13 | {OllamaAnalyser.CONFIGURATION_MODEL: 'DUMMY_MODEL'}, 14 | 'ollama', 15 | ) 16 | -------------------------------------------------------------------------------- /tests/analysers/openai_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textLSP.analysers.openai import OpenAIAnalyser 4 | from openai import AuthenticationError 5 | 6 | 7 | @pytest.fixture 8 | def analyser(): 9 | return OpenAIAnalyser( 10 | None, 11 | {OpenAIAnalyser.CONFIGURATION_API_KEY: 'DUMMY_KEY'}, 12 | 'openai', 13 | ) 14 | 15 | 16 | def test_edit(analyser): 17 | with pytest.raises(AuthenticationError): 18 | analyser._edit('This is as santance.') 19 | 20 | 21 | def test_generate(analyser): 22 | with pytest.raises(AuthenticationError): 23 | analyser._generate('Write me a sentence:') 24 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import copy 3 | 4 | from pygls.protocol import default_converter 5 | 6 | from tests.lsp_test_client import session, defaults 7 | 8 | 9 | @pytest.fixture 10 | def json_converter(): 11 | return default_converter() 12 | 13 | 14 | @pytest.fixture 15 | def simple_server(): 16 | with session.LspSession() as lsp_session: 17 | lsp_session.initialize() 18 | yield lsp_session 19 | 20 | 21 | @pytest.fixture 22 | def langtool_ls(): 23 | init_params = copy.deepcopy(defaults.VSCODE_DEFAULT_INITIALIZE) 24 | init_params["initializationOptions"] = { 25 | 'textLSP': { 26 | 'analysers': { 27 | 'languagetool': { 28 | 'enabled': True, 29 | 'check_text': { 30 | 'on_open': True, 31 | 'on_save': True, 32 | 'on_change': True, 33 | } 34 | } 35 | } 36 | } 37 | } 38 | 39 | with session.LspSession() as lsp_session: 40 | lsp_session.initialize(init_params) 41 | 42 | yield lsp_session 43 | 44 | 45 | @pytest.fixture 46 | def langtool_ls_onsave(): 47 | init_params = copy.deepcopy(defaults.VSCODE_DEFAULT_INITIALIZE) 48 | init_params["initializationOptions"] = { 49 | 'textLSP': { 50 | 'analysers': { 51 | 'languagetool': { 52 | 'enabled': True, 53 | 'check_text': { 54 | 'on_open': True, 55 | 'on_save': True, 56 | 'on_change': False, 57 | } 58 | } 59 | } 60 | } 61 | } 62 | 63 | with session.LspSession() as lsp_session: 64 | lsp_session.initialize(init_params) 65 | 66 | yield lsp_session 67 | -------------------------------------------------------------------------------- /tests/documents/org_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textLSP.documents.org import OrgDocument 4 | 5 | 6 | @pytest.mark.parametrize('src,clean', [ 7 | ( 8 | '** DONE Task 1 :TAG:\n' 9 | ' SCHEDULED: <2023-01-27 Fri> CLOSED: [2023-01-27 Fri 13:01]\n' 10 | ' - Level 1 list:\n' 11 | ' - Level 2 list 1\n' 12 | ' - Level 2 list 2', 13 | # 14 | 'Task 1\n' 15 | '\n' 16 | 'Level 1 list:\n' 17 | '\n' 18 | 'Level 2 list 1\n' 19 | '\n' 20 | 'Level 2 list 2\n' 21 | ), 22 | ( 23 | '** Task 1\n' 24 | ' This is a paragraph.\n' 25 | '** Task 2\n' 26 | ' This is a paragraph.\n', 27 | # 28 | 'Task 1\n' 29 | '\n' 30 | 'This is a paragraph.\n' 31 | '\n' 32 | 'Task 2\n' 33 | '\n' 34 | 'This is a paragraph.\n' 35 | ), 36 | ( 37 | '** Task 1\n' 38 | ' This is a paragraph.\n' 39 | ' This is another sentence in it.', 40 | # 41 | 'Task 1\n' 42 | '\n' 43 | 'This is a paragraph. This is another sentence in it.\n' 44 | ), 45 | ( 46 | '** Task 1\n' 47 | ' This is a paragraph.\n' 48 | '\n' 49 | ' This is another paragraph.', 50 | # 51 | 'Task 1\n' 52 | '\n' 53 | 'This is a paragraph.\n' 54 | '\n' 55 | 'This is another paragraph.\n' 56 | ), 57 | ( 58 | '** DONE Task DONE 1\n' 59 | ' * DONE This is a list.', 60 | # 61 | 'Task DONE 1\n' 62 | '\n' 63 | 'DONE This is a list.\n' 64 | ), 65 | ]) 66 | def test_clean(src, clean): 67 | doc = OrgDocument( 68 | 'tmp.org', 69 | src, 70 | config={OrgDocument.CONFIGURATION_TODO_KEYWORDS: {'DONE'}} 71 | ) 72 | 73 | assert doc.cleaned_source == clean 74 | -------------------------------------------------------------------------------- /tests/documents/txt_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textLSP.documents.txt import TxtDocument 4 | 5 | 6 | @pytest.mark.parametrize('src,clean', [ 7 | ( 8 | 'This is a sentence.', 9 | # 10 | 'This is a sentence.', 11 | ), 12 | ( 13 | 'This is a sentence.\n', 14 | # 15 | 'This is a sentence.\n', 16 | ), 17 | ( 18 | '\n\nThis is a sentence.', 19 | # 20 | '\n\nThis is a sentence.', 21 | ), 22 | ( 23 | 'This is a sentence.\n' 24 | 'This is a sentence.', 25 | # 26 | 'This is a sentence.' 27 | ' ' 28 | 'This is a sentence.', 29 | ), 30 | ( 31 | 'This is a sentence.\n' 32 | '\n' 33 | 'This is a sentence.', 34 | # 35 | 'This is a sentence.\n' 36 | '\n' 37 | 'This is a sentence.', 38 | ), 39 | ]) 40 | def test_clean(src, clean): 41 | doc = TxtDocument( 42 | 'tmp.txt', 43 | src, 44 | ) 45 | 46 | assert doc.cleaned_source == clean 47 | 48 | 49 | @pytest.mark.parametrize('src,offset,exp', [ 50 | ( 51 | 'This is a sentence.', 52 | # (offset, length) 53 | (0, 4), 54 | 'This', 55 | ), 56 | ( 57 | 'This is a sentence.', 58 | # (offset, length) 59 | (5, 4), 60 | 'is a', 61 | ), 62 | ( 63 | 'This is a sentence.\n' 64 | '\n' 65 | 'That is a file.', 66 | # (offset, length) 67 | (31, 4), 68 | 'file', 69 | ), 70 | ]) 71 | def test_highlight(src, offset, exp): 72 | doc = TxtDocument( 73 | 'tmp.txt', 74 | src, 75 | ) 76 | 77 | pos_range = doc.range_at_offset(offset[0], offset[1], True) 78 | 79 | lines = src.splitlines(True) 80 | if pos_range.start.line == pos_range.end.line: 81 | res = lines[pos_range.start.line][pos_range.start.character:pos_range.end.character+1] 82 | else: 83 | res = lines[pos_range.start.line][pos_range.start.character:] 84 | res += ''.join([lines[idx] for idx in range(pos_range.start.line+1, pos_range.end.line)]) 85 | res += lines[pos_range.end.line][:pos_range.end.character+1] 86 | 87 | assert res == exp 88 | -------------------------------------------------------------------------------- /tests/lsp_test_client/__init__.py: -------------------------------------------------------------------------------- 1 | # Taken from: https://github.com/pappasam/jedi-language-server 2 | """Test client main module.""" 3 | 4 | import py 5 | 6 | from .utils import as_uri 7 | 8 | TEST_ROOT = py.path.local(__file__) / ".." 9 | PROJECT_ROOT = TEST_ROOT / ".." / ".." 10 | PROJECT_URI = as_uri(PROJECT_ROOT) 11 | -------------------------------------------------------------------------------- /tests/lsp_test_client/defaults.py: -------------------------------------------------------------------------------- 1 | """Default values for lsp test client.""" 2 | import os 3 | 4 | import tests.lsp_test_client as lsp_client 5 | 6 | VSCODE_DEFAULT_INITIALIZE = { 7 | "processId": os.getpid(), # pylint: disable=no-member 8 | "clientInfo": {"name": "vscode", "version": "1.45.0"}, 9 | "rootPath": str(lsp_client.PROJECT_ROOT), 10 | "rootUri": lsp_client.PROJECT_URI, 11 | "capabilities": { 12 | "workspace": { 13 | "applyEdit": True, 14 | "workspaceEdit": { 15 | "documentChanges": True, 16 | "resourceOperations": ["create", "rename", "delete"], 17 | "failureHandling": "textOnlyTransactional", 18 | }, 19 | "didChangeConfiguration": {"dynamicRegistration": True}, 20 | "didChangeWatchedFiles": {"dynamicRegistration": True}, 21 | "symbol": { 22 | "dynamicRegistration": True, 23 | "symbolKind": { 24 | "valueSet": [ 25 | 1, 26 | 2, 27 | 3, 28 | 4, 29 | 5, 30 | 6, 31 | 7, 32 | 8, 33 | 9, 34 | 10, 35 | 11, 36 | 12, 37 | 13, 38 | 14, 39 | 15, 40 | 16, 41 | 17, 42 | 18, 43 | 19, 44 | 20, 45 | 21, 46 | 22, 47 | 23, 48 | 24, 49 | 25, 50 | 26, 51 | ] 52 | }, 53 | "tagSupport": {"valueSet": [1]}, 54 | }, 55 | "executeCommand": {"dynamicRegistration": True}, 56 | "configuration": True, 57 | "workspaceFolders": True, 58 | }, 59 | "textDocument": { 60 | "publishDiagnostics": { 61 | "relatedInformation": True, 62 | "versionSupport": False, 63 | "tagSupport": {"valueSet": [1, 2]}, 64 | "complexDiagnosticCodeSupport": True, 65 | }, 66 | "synchronization": { 67 | "dynamicRegistration": True, 68 | "willSave": True, 69 | "willSaveWaitUntil": True, 70 | "didSave": True, 71 | }, 72 | "completion": { 73 | "dynamicRegistration": True, 74 | "contextSupport": True, 75 | "completionItem": { 76 | "snippetSupport": True, 77 | "commitCharactersSupport": True, 78 | "documentationFormat": ["markdown", "plaintext"], 79 | "deprecatedSupport": True, 80 | "preselectSupport": True, 81 | "tagSupport": {"valueSet": [1]}, 82 | "insertReplaceSupport": True, 83 | }, 84 | "completionItemKind": { 85 | "valueSet": [ 86 | 1, 87 | 2, 88 | 3, 89 | 4, 90 | 5, 91 | 6, 92 | 7, 93 | 8, 94 | 9, 95 | 10, 96 | 11, 97 | 12, 98 | 13, 99 | 14, 100 | 15, 101 | 16, 102 | 17, 103 | 18, 104 | 19, 105 | 20, 106 | 21, 107 | 22, 108 | 23, 109 | 24, 110 | 25, 111 | ] 112 | }, 113 | }, 114 | "hover": { 115 | "dynamicRegistration": True, 116 | "contentFormat": ["markdown", "plaintext"], 117 | }, 118 | "signatureHelp": { 119 | "dynamicRegistration": True, 120 | "signatureInformation": { 121 | "documentationFormat": ["markdown", "plaintext"], 122 | "parameterInformation": {"labelOffsetSupport": True}, 123 | }, 124 | "contextSupport": True, 125 | }, 126 | "definition": {"dynamicRegistration": True, "linkSupport": True}, 127 | "references": {"dynamicRegistration": True}, 128 | "documentHighlight": {"dynamicRegistration": True}, 129 | "documentSymbol": { 130 | "dynamicRegistration": True, 131 | "symbolKind": { 132 | "valueSet": [ 133 | 1, 134 | 2, 135 | 3, 136 | 4, 137 | 5, 138 | 6, 139 | 7, 140 | 8, 141 | 9, 142 | 10, 143 | 11, 144 | 12, 145 | 13, 146 | 14, 147 | 15, 148 | 16, 149 | 17, 150 | 18, 151 | 19, 152 | 20, 153 | 21, 154 | 22, 155 | 23, 156 | 24, 157 | 25, 158 | 26, 159 | ] 160 | }, 161 | "hierarchicalDocumentSymbolSupport": True, 162 | "tagSupport": {"valueSet": [1]}, 163 | }, 164 | "codeAction": { 165 | "dynamicRegistration": True, 166 | "isPreferredSupport": True, 167 | "codeActionLiteralSupport": { 168 | "codeActionKind": { 169 | "valueSet": [ 170 | "", 171 | "quickfix", 172 | "refactor", 173 | "refactor.extract", 174 | "refactor.inline", 175 | "refactor.rewrite", 176 | "source", 177 | "source.organizeImports", 178 | ] 179 | } 180 | }, 181 | }, 182 | "codeLens": {"dynamicRegistration": True}, 183 | "formatting": {"dynamicRegistration": True}, 184 | "rangeFormatting": {"dynamicRegistration": True}, 185 | "onTypeFormatting": {"dynamicRegistration": True}, 186 | "rename": {"dynamicRegistration": True, "prepareSupport": True}, 187 | "documentLink": { 188 | "dynamicRegistration": True, 189 | "tooltipSupport": True, 190 | }, 191 | "typeDefinition": { 192 | "dynamicRegistration": True, 193 | "linkSupport": True, 194 | }, 195 | "implementation": { 196 | "dynamicRegistration": True, 197 | "linkSupport": True, 198 | }, 199 | "colorProvider": {"dynamicRegistration": True}, 200 | "foldingRange": { 201 | "dynamicRegistration": True, 202 | "rangeLimit": 5000, 203 | "lineFoldingOnly": True, 204 | }, 205 | "declaration": {"dynamicRegistration": True, "linkSupport": True}, 206 | "selectionRange": {"dynamicRegistration": True}, 207 | }, 208 | "window": {"workDoneProgress": True}, 209 | }, 210 | "trace": "verbose", 211 | "workspaceFolders": [{"uri": lsp_client.PROJECT_URI, "name": "textLSP"}], 212 | "initializationOptions": { 213 | }, 214 | } 215 | -------------------------------------------------------------------------------- /tests/lsp_test_client/lsp_run.py: -------------------------------------------------------------------------------- 1 | """Run Language Server for Test.""" 2 | 3 | import sys 4 | 5 | from textLSP.cli import main 6 | 7 | sys.exit(main()) 8 | -------------------------------------------------------------------------------- /tests/lsp_test_client/session.py: -------------------------------------------------------------------------------- 1 | """Provides LSP session helpers for testing.""" 2 | 3 | import os 4 | import subprocess 5 | import sys 6 | from concurrent.futures import Future, ThreadPoolExecutor 7 | from threading import Event 8 | 9 | from pylsp_jsonrpc.dispatchers import MethodDispatcher 10 | from pylsp_jsonrpc.endpoint import Endpoint 11 | from pylsp_jsonrpc.streams import JsonRpcStreamReader, JsonRpcStreamWriter 12 | 13 | from tests.lsp_test_client import defaults 14 | 15 | LSP_EXIT_TIMEOUT = 5000 16 | 17 | 18 | PUBLISH_DIAGNOSTICS = "textDocument/publishDiagnostics" 19 | WINDOW_LOG_MESSAGE = "window/logMessage" 20 | WINDOW_SHOW_MESSAGE = "window/showMessage" 21 | WINDOW_WORK_DONE_PROGRESS_CREATE = "window/workDoneProgress/create" 22 | 23 | # pylint: disable=no-member 24 | 25 | 26 | class LspSession(MethodDispatcher): 27 | """Send and Receive messages over LSP as a test LS Client.""" 28 | 29 | def __init__(self, cwd=None): 30 | self.cwd = cwd if cwd else os.getcwd() 31 | # pylint: disable=consider-using-with 32 | self._thread_pool = ThreadPoolExecutor() 33 | self._sub = None 34 | self._writer = None 35 | self._reader = None 36 | self._endpoint = None 37 | self._notification_callbacks = {} 38 | 39 | def __enter__(self): 40 | """Context manager entrypoint. 41 | 42 | shell=True needed for pytest-cov to work in subprocess. 43 | """ 44 | # pylint: disable=consider-using-with 45 | self._sub = subprocess.Popen( 46 | [ 47 | sys.executable, 48 | os.path.join(os.path.dirname(__file__), "lsp_run.py"), 49 | ], 50 | stdout=subprocess.PIPE, 51 | stdin=subprocess.PIPE, 52 | bufsize=0, 53 | cwd=self.cwd, 54 | env=os.environ, 55 | shell="WITH_COVERAGE" in os.environ, 56 | ) 57 | 58 | self._writer = JsonRpcStreamWriter( 59 | os.fdopen(self._sub.stdin.fileno(), "wb") 60 | ) 61 | self._reader = JsonRpcStreamReader( 62 | os.fdopen(self._sub.stdout.fileno(), "rb") 63 | ) 64 | 65 | dispatcher = { 66 | PUBLISH_DIAGNOSTICS: self._publish_diagnostics, 67 | WINDOW_SHOW_MESSAGE: self._window_show_message, 68 | WINDOW_LOG_MESSAGE: self._window_log_message, 69 | WINDOW_WORK_DONE_PROGRESS_CREATE: self._window_work_done_progress_create, 70 | } 71 | self._endpoint = Endpoint(dispatcher, self._writer.write) 72 | self._thread_pool.submit(self._reader.listen, self._endpoint.consume) 73 | return self 74 | 75 | def __exit__(self, typ, value, _tb): 76 | self.shutdown(True) 77 | try: 78 | self._sub.terminate() 79 | except Exception: # pylint:disable=broad-except 80 | pass 81 | self._endpoint.shutdown() 82 | self._thread_pool.shutdown() 83 | 84 | def initialize( 85 | self, 86 | initialize_params=None, 87 | process_server_capabilities=None, 88 | ): 89 | """Sends the initialize request to LSP server.""" 90 | server_initialized = Event() 91 | 92 | def _after_initialize(fut): 93 | if process_server_capabilities: 94 | process_server_capabilities(fut.result()) 95 | self.initialized() 96 | server_initialized.set() 97 | 98 | self._send_request( 99 | "initialize", 100 | params=( 101 | initialize_params 102 | if initialize_params is not None 103 | else defaults.VSCODE_DEFAULT_INITIALIZE 104 | ), 105 | handle_response=_after_initialize, 106 | ) 107 | 108 | server_initialized.wait() 109 | 110 | def initialized(self, initialized_params=None): 111 | """Sends the initialized notification to LSP server.""" 112 | if initialized_params is None: 113 | initialized_params = {} 114 | self._endpoint.notify("initialized", initialized_params) 115 | 116 | def shutdown(self, should_exit, exit_timeout=LSP_EXIT_TIMEOUT): 117 | """Sends the shutdown request to LSP server.""" 118 | 119 | def _after_shutdown(_): 120 | if should_exit: 121 | self.exit_lsp(exit_timeout) 122 | 123 | self._send_request("shutdown", handle_response=_after_shutdown) 124 | 125 | def exit_lsp(self, exit_timeout=LSP_EXIT_TIMEOUT): 126 | """Handles LSP server process exit.""" 127 | self._endpoint.notify("exit") 128 | assert self._sub.wait(exit_timeout) == 0 129 | 130 | def text_document_completion(self, completion_params): 131 | """Sends text document completion request to LSP server.""" 132 | fut = self._send_request( 133 | "textDocument/completion", params=completion_params 134 | ) 135 | return fut.result() 136 | 137 | def text_document_rename(self, rename_params): 138 | """Sends text document rename request to LSP server.""" 139 | fut = self._send_request("textDocument/rename", params=rename_params) 140 | return fut.result() 141 | 142 | def text_document_code_action(self, code_action_params): 143 | """Sends text document code action request to LSP server.""" 144 | fut = self._send_request( 145 | "textDocument/codeAction", params=code_action_params 146 | ) 147 | return fut.result() 148 | 149 | def text_document_hover(self, hover_params): 150 | """Sends text document hover request to LSP server.""" 151 | fut = self._send_request("textDocument/hover", params=hover_params) 152 | return fut.result() 153 | 154 | def text_document_signature_help(self, signature_help_params): 155 | """Sends text document hover request to LSP server.""" 156 | fut = self._send_request( 157 | "textDocument/signatureHelp", params=signature_help_params 158 | ) 159 | return fut.result() 160 | 161 | def text_document_definition(self, definition_params): 162 | """Sends text document definition request to LSP server.""" 163 | fut = self._send_request( 164 | "textDocument/definition", params=definition_params 165 | ) 166 | return fut.result() 167 | 168 | def text_document_symbol(self, document_symbol_params): 169 | """Sends text document symbol request to LSP server.""" 170 | fut = self._send_request( 171 | "textDocument/documentSymbol", params=document_symbol_params 172 | ) 173 | return fut.result() 174 | 175 | def text_document_highlight(self, document_highlight_params): 176 | """Sends text document highlight request to LSP server.""" 177 | fut = self._send_request( 178 | "textDocument/documentHighlight", params=document_highlight_params 179 | ) 180 | return fut.result() 181 | 182 | def text_document_references(self, references_params): 183 | """Sends text document references request to LSP server.""" 184 | fut = self._send_request( 185 | "textDocument/references", params=references_params 186 | ) 187 | return fut.result() 188 | 189 | def workspace_symbol(self, workspace_symbol_params): 190 | """Sends workspace symbol request to LSP server.""" 191 | fut = self._send_request( 192 | "workspace/symbol", params=workspace_symbol_params 193 | ) 194 | return fut.result() 195 | 196 | def completion_item_resolve(self, resolve_params): 197 | """Sends completion item resolve request to LSP server.""" 198 | fut = self._send_request( 199 | "completionItem/resolve", params=resolve_params 200 | ) 201 | return fut.result() 202 | 203 | def notify_did_change(self, did_change_params): 204 | """Sends did change notification to LSP Server.""" 205 | self._send_notification( 206 | "textDocument/didChange", params=did_change_params 207 | ) 208 | 209 | def notify_did_save(self, did_save_params): 210 | """Sends did save notification to LSP Server.""" 211 | self._send_notification("textDocument/didSave", params=did_save_params) 212 | 213 | def notify_did_open(self, did_open_params): 214 | """Sends did open notification to LSP Server.""" 215 | self._send_notification("textDocument/didOpen", params=did_open_params) 216 | 217 | def set_notification_callback(self, notification_name, callback): 218 | """Set custom LS notification handler.""" 219 | self._notification_callbacks[notification_name] = callback 220 | 221 | def get_notification_callback(self, notification_name): 222 | """Gets callback if set or default callback for a given LS 223 | notification.""" 224 | try: 225 | return self._notification_callbacks[notification_name] 226 | except KeyError: 227 | 228 | def _default_handler(_params): 229 | """Default notification handler.""" 230 | 231 | return _default_handler 232 | 233 | def _publish_diagnostics(self, publish_diagnostics_params): 234 | """Internal handler for text document publish diagnostics.""" 235 | return self._handle_notification( 236 | PUBLISH_DIAGNOSTICS, publish_diagnostics_params 237 | ) 238 | 239 | def _window_log_message(self, window_log_message_params): 240 | """Internal handler for window log message.""" 241 | return self._handle_notification( 242 | WINDOW_LOG_MESSAGE, window_log_message_params 243 | ) 244 | 245 | def _window_show_message(self, window_show_message_params): 246 | """Internal handler for window show message.""" 247 | return self._handle_notification( 248 | WINDOW_SHOW_MESSAGE, window_show_message_params 249 | ) 250 | 251 | def _window_work_done_progress_create(self, window_progress_params): 252 | """Internal handler for window/workDoneProgress/create""" 253 | return self._handle_notification( 254 | WINDOW_WORK_DONE_PROGRESS_CREATE, window_progress_params 255 | ) 256 | 257 | def _handle_notification(self, notification_name, params): 258 | """Internal handler for notifications.""" 259 | fut = Future() 260 | 261 | def _handler(): 262 | callback = self.get_notification_callback(notification_name) 263 | callback(params) 264 | fut.set_result(None) 265 | 266 | self._thread_pool.submit(_handler) 267 | return fut 268 | 269 | def _send_request( 270 | self, name, params=None, handle_response=lambda f: f.done() 271 | ): 272 | """Sends {name} request to the LSP server.""" 273 | fut = self._endpoint.request(name, params) 274 | fut.add_done_callback(handle_response) 275 | return fut 276 | 277 | def _send_notification(self, name, params=None): 278 | """Sends {name} notification to the LSP server.""" 279 | self._endpoint.notify(name, params) 280 | -------------------------------------------------------------------------------- /tests/lsp_test_client/utils.py: -------------------------------------------------------------------------------- 1 | """Provides LSP client side utilities for easier testing.""" 2 | 3 | import pathlib 4 | import platform 5 | import functools 6 | 7 | import py 8 | 9 | # pylint: disable=no-member 10 | 11 | 12 | def normalizecase(path: str) -> str: 13 | """Fixes 'file' uri or path case for easier testing in windows.""" 14 | if platform.system() == "Windows": 15 | return path.lower() 16 | return path 17 | 18 | 19 | def as_uri(path: py.path.local) -> str: 20 | """Return 'file' uri as string.""" 21 | return normalizecase(pathlib.Path(path).as_uri()) 22 | 23 | 24 | def handle_notification(params, event, results=None): 25 | if results is not None: 26 | results.append(params) 27 | event.set() 28 | 29 | 30 | def get_notification_handler(*args, **kwargs): 31 | return functools.partial(handle_notification, *args, **kwargs) 32 | -------------------------------------------------------------------------------- /tests/server_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pytest 3 | 4 | from multiprocessing import Process 5 | 6 | from textLSP import cli 7 | from textLSP.server import SERVER 8 | 9 | 10 | sys_argv_0 = sys.argv[0] 11 | 12 | 13 | @pytest.mark.parametrize('args', [ 14 | [sys_argv_0, '-a', '127.0.0.1', '-p', '9999'], 15 | [sys_argv_0], 16 | ]) 17 | def test_cli(args): 18 | sys.argv = args 19 | p = Process(target=cli.main) 20 | p.start() 21 | p.join(1) 22 | SERVER.shutdown() 23 | p.join(1) 24 | p.kill() 25 | -------------------------------------------------------------------------------- /tests/utils_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from textLSP import utils, types 4 | 5 | 6 | @pytest.mark.parametrize('src,exp,max,min', [ 7 | ( 8 | 'This is a sentence of 47 characters. ' 9 | 'This is a sentence of 47 characters. ' 10 | 'This is a sentence of 47 characters.', 11 | [ 12 | 'This is a sentence of 47 characters. ', 13 | 'This is a sentence of 47 characters. ', 14 | 'This is a sentence of 47 characters.', 15 | ], 16 | 47, 17 | 0, 18 | ), 19 | ( 20 | 'This is a sentence of 47 characters. ' 21 | 'This is a sentence of 47 characters. ' 22 | 'This is a sentence of 47 characters.', 23 | [ 24 | 'This is a sentence of 47 characters. ' 25 | 'This is a sentence of 47 characters. ' 26 | 'This is a sentence of 47 characters.', 27 | ], 28 | 3*47, 29 | 0, 30 | ), 31 | ( 32 | 'This is a sentence of 47 characters.\n' 33 | 'This is a sentence of 47 characters.\n' 34 | 'This is a sentence of 47 characters.', 35 | [ 36 | 'This is a sentence of 47 characters.\n', 37 | 'This is a sentence of 47 characters.\n', 38 | 'This is a sentence of 47 characters.', 39 | ], 40 | 47, 41 | 0, 42 | ), 43 | ( 44 | 'This is a sentence of 47 characters. ' 45 | 'This is a sentence of 47 characters.', 46 | [ 47 | 'This is a sentence of 47 ', 48 | 'characters. ', 49 | 'This is a sentence of 47 ', 50 | 'characters.', 51 | ], 52 | 25, 53 | 0, 54 | ), 55 | ( 56 | 'This is a sentence of 47 characters. ' 57 | 'This is a sentence of 47 characters.', 58 | [ 59 | 'This is a sentence of 47 ', 60 | 'characters. This is a sen', 61 | 'tence of 47 characters.', 62 | ], 63 | 25, 64 | 15, 65 | ), 66 | ( 67 | 'This is a. sentence of 48 characters. ' 68 | 'This is a. sentence of 48 characters. ' 69 | 'This is a sentence of 47 characters.', 70 | [ 71 | 'This is a. sentence of 48 characters. ', 72 | 'This is a. sentence of 48 characters. ', 73 | 'This is a sentence of 47 characters.', 74 | ], 75 | 48, 76 | 0, 77 | ), 78 | ]) 79 | def test_batch_text(src, exp, max, min): 80 | res = list(utils.batch_text(src, types.TEXT_PASSAGE_PATTERN, max, min)) 81 | 82 | assert res == exp 83 | 84 | 85 | @pytest.mark.parametrize('s1,s2,exp', [ 86 | ( 87 | 'This is a sentence of 47 characters. ', 88 | 'This is a sentence of 48 characters. ', 89 | [ 90 | types.TokenDiff( 91 | types.TokenDiff.REPLACE, 92 | '47', 93 | '48', 94 | 22, 95 | 2 96 | ), 97 | ], 98 | ), 99 | ( 100 | 'This is a sentence of 47 characters. ', 101 | 'That is a sentence of 47 characters. ', 102 | [ 103 | types.TokenDiff( 104 | types.TokenDiff.REPLACE, 105 | 'This', 106 | 'That', 107 | 0, 108 | 4 109 | ), 110 | ], 111 | ), 112 | ( 113 | 'This is a sentence of 47 characters. ', 114 | 'This example is a sentence of 47 characters. ', 115 | [ 116 | types.TokenDiff( 117 | types.TokenDiff.INSERT, 118 | '', 119 | ' example', 120 | 4, 121 | 0 122 | ), 123 | ], 124 | ), 125 | ( 126 | 'This example is a sentence of 47 characters. ', 127 | 'This is a sentence of 47 characters. ', 128 | [ 129 | types.TokenDiff( 130 | types.TokenDiff.DELETE, 131 | ' example', 132 | '', 133 | 4, 134 | 8 135 | ), 136 | ], 137 | ), 138 | ( 139 | 'This example is a sentence of 47 characters. ', 140 | 'This is a good sentence of 48 characters. ', 141 | [ 142 | types.TokenDiff( 143 | types.TokenDiff.DELETE, 144 | ' example', 145 | '', 146 | 4, 147 | 8 148 | ), 149 | types.TokenDiff( 150 | types.TokenDiff.INSERT, 151 | '', 152 | 'good ', # XXX: the position of space seems to be a bit inconsistent, before or after 153 | 18, 154 | 0 155 | ), 156 | types.TokenDiff( 157 | types.TokenDiff.REPLACE, 158 | '47', 159 | '48', 160 | 30, 161 | 2 162 | ), 163 | ], 164 | ), 165 | ( 166 | 'This is a sentence of 47 characters. ', 167 | 'This is a sentence of 47 characters. ', 168 | [], 169 | ), 170 | ( 171 | 'This is a sentence.\n' 172 | '\n' 173 | 'This is a new paragraph.\n', 174 | 'This is a sentence.\n' 175 | '\n' 176 | 'This is the new paragraph.\n', 177 | [ 178 | types.TokenDiff( 179 | types.TokenDiff.REPLACE, 180 | 'a', 181 | 'the', 182 | 29, 183 | 1 184 | ), 185 | ], 186 | ), 187 | ( 188 | 'This is a sentence.\n' 189 | '\n' 190 | 'This is a new paragraph.\n', 191 | 'This is a sentence.\n' 192 | '\n' 193 | 'That this is a new paragraph.\n', 194 | [ 195 | types.TokenDiff( 196 | types.TokenDiff.REPLACE, 197 | 'This', 198 | 'That this', 199 | 21, 200 | 4 201 | ), 202 | ], 203 | ), 204 | ]) 205 | def test_token_diff(s1, s2, exp): 206 | res = types.TokenDiff.token_level_diff(s1, s2) 207 | 208 | assert res == exp 209 | -------------------------------------------------------------------------------- /textLSP/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangyav/textLSP/d23c638521847a0b9a6b9b864df2ccec14f1856e/textLSP/__init__.py -------------------------------------------------------------------------------- /textLSP/analysers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangyav/textLSP/d23c638521847a0b9a6b9b864df2ccec14f1856e/textLSP/analysers/__init__.py -------------------------------------------------------------------------------- /textLSP/analysers/gramformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .gramformer import GramformerAnalyser 2 | -------------------------------------------------------------------------------- /textLSP/analysers/gramformer/gramformer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from re import Match 4 | from itertools import chain 5 | from typing import List, Tuple 6 | from gramformer import Gramformer 7 | from lsprotocol.types import ( 8 | Diagnostic, 9 | Range, 10 | Position, 11 | TextEdit, 12 | CodeAction, 13 | ) 14 | from pygls.server import LanguageServer 15 | 16 | from ..analyser import Analyser 17 | from ...types import Interval, TEXT_PASSAGE_PATTERN 18 | from ...documents.document import BaseDocument 19 | 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class GramformerAnalyser(Analyser): 25 | CONFIGURATION_GPU = 'gpu' 26 | 27 | SETTINGS_DEFAULT_GPU = False 28 | 29 | def __init__(self, language_server: LanguageServer, config: dict, name: str): 30 | super().__init__(language_server, config, name) 31 | try: 32 | # This could take some time the first time to download models. 33 | self.analyser = Gramformer( 34 | models=1, # 1=corrector, 2=detector 35 | use_gpu=self.config.get(self.CONFIGURATION_GPU, self.SETTINGS_DEFAULT_GPU), 36 | ) 37 | except OSError: 38 | from spacy.cli import download 39 | download('en') 40 | 41 | self.analyser = Gramformer( 42 | models=1, # 1=corrector, 2=detector 43 | use_gpu=self.config.get(self.CONFIGURATION_GPU, self.SETTINGS_DEFAULT_GPU), 44 | ) 45 | 46 | def _analyse_sentences(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeAction]]: 47 | diagnostics = list() 48 | code_actions = list() 49 | 50 | sidx = 0 51 | for match in chain(TEXT_PASSAGE_PATTERN.finditer(text), [len(text)]): 52 | if type(match) == Match: 53 | eidx = match.end() 54 | else: 55 | eidx = match 56 | if sidx == eidx: 57 | continue 58 | 59 | sentence = text[sidx:eidx] 60 | diags, actions = self._analyse(sentence, doc, sidx+offset) 61 | diagnostics.extend(diags) 62 | code_actions.extend(actions) 63 | 64 | sidx = eidx 65 | 66 | return diagnostics, code_actions 67 | 68 | def _analyse(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeAction]]: 69 | text = text.strip() 70 | if len(text) == 0: 71 | return [], [] 72 | 73 | diagnostics = list() 74 | code_actions = list() 75 | 76 | corrected = self.analyser.correct(text, max_candidates=1) 77 | if len(corrected) > 0: 78 | edits = self.analyser.get_edits(text, corrected.pop()) 79 | tokenized_text = text.split(' ') 80 | 81 | for edit in edits: 82 | # edit = (ERROR_CODE, WORD_OLD, OLD_START_POS, OLD_END_POS, WORD_NEW, NEW_START_POS, NEW_END_POS) 83 | token = edit[1] 84 | start_pos = 0 85 | if edit[2] > 0: 86 | start_pos = len(' '.join(tokenized_text[:edit[2]])) + 1 87 | end_pos = len(' '.join(tokenized_text[:edit[3]])) 88 | 89 | range = doc.range_at_offset( 90 | start_pos+offset, 91 | end_pos-start_pos, 92 | True 93 | ) 94 | range = Range( 95 | start=range.start, 96 | end=Position( 97 | line=range.end.line, 98 | character=range.end.character+1, 99 | ) 100 | ) 101 | if len(token) > 0 and len(edit[4]) > 0: 102 | message = f'"{token}": use "{edit[4]}" instead' 103 | elif len(token) > 0: 104 | message = f'"{token}": remove' 105 | else: 106 | message = f'insert "{edit[4]}"' 107 | 108 | diagnostic = Diagnostic( 109 | range=range, 110 | message=message, 111 | source='gramformer', 112 | severity=self.get_severity(), 113 | code=f'gramformer:{edit[0]}', 114 | ) 115 | action = self.build_single_suggestion_action( 116 | doc=doc, 117 | title=f'"{token}" -> "{edit[4]}"', 118 | edit=TextEdit( 119 | range=diagnostic.range, 120 | new_text=edit[4], 121 | ), 122 | diagnostic=diagnostic, 123 | ) 124 | code_actions.append(action) 125 | diagnostics.append(diagnostic) 126 | 127 | return diagnostics, code_actions 128 | 129 | def _did_open(self, doc: BaseDocument): 130 | diagnostics, actions = self._analyse_sentences(doc.cleaned_source, doc) 131 | self.add_diagnostics(doc, diagnostics) 132 | self.add_code_actions(doc, actions) 133 | 134 | def _did_change(self, doc: BaseDocument, changes: List[Interval]): 135 | diagnostics = list() 136 | code_actions = list() 137 | checked = set() 138 | for change in changes: 139 | paragraph = doc.paragraph_at_offset( 140 | change.start, 141 | min_offset=change.start + change.length-1, 142 | cleaned=True, 143 | ) 144 | if paragraph in checked: 145 | continue 146 | 147 | pos_range = doc.range_at_offset( 148 | paragraph.start, 149 | paragraph.length, 150 | True 151 | ) 152 | self.remove_code_items_at_range(doc, pos_range) 153 | 154 | diags, actions = self._analyse_sentences( 155 | doc.text_at_offset( 156 | paragraph.start, 157 | paragraph.length, 158 | True 159 | ), 160 | doc, 161 | paragraph.start, 162 | ) 163 | 164 | diagnostics.extend([ 165 | diag 166 | for diag in diags 167 | if diag.range.start >= pos_range.start 168 | ]) 169 | code_actions.extend([ 170 | action 171 | for action in actions 172 | if action.edit.document_changes[0].edits[0].range.start >= pos_range.start 173 | ]) 174 | 175 | checked.add(paragraph) 176 | self.add_diagnostics(doc, diagnostics) 177 | self.add_code_actions(doc, code_actions) 178 | -------------------------------------------------------------------------------- /textLSP/analysers/grammarbot/__init__.py: -------------------------------------------------------------------------------- 1 | from .grammarbot import GrammarBotAnalyser 2 | -------------------------------------------------------------------------------- /textLSP/analysers/grammarbot/grammarbot.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import requests 3 | import urllib.parse 4 | import bisect 5 | 6 | from typing import List 7 | from lsprotocol.types import ( 8 | Diagnostic, 9 | TextEdit, 10 | ) 11 | from pygls.server import LanguageServer 12 | 13 | from ..analyser import Analyser, AnalysisError 14 | from ...documents.document import BaseDocument 15 | from ...utils import batch_text 16 | from ...types import ConfigurationError, TEXT_PASSAGE_PATTERN, Interval 17 | 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class GrammarBotAnalyser(Analyser): 23 | CONFIGURATION_API_KEY = 'api_key' 24 | CONFIGURATION_INPUT_MAX_REQUESTS = 'input_max_requests' 25 | CONFIGURATION_REQUESTS_OVERFLOW = 'requests_overflow' 26 | 27 | SETTINGS_DEFAULT_CHECK_ON = { 28 | Analyser.CONFIGURATION_CHECK_ON_OPEN: False, 29 | Analyser.CONFIGURATION_CHECK_ON_CHANGE: False, 30 | Analyser.CONFIGURATION_CHECK_ON_SAVE: False, 31 | } 32 | 33 | URL = "https://grammarbot.p.rapidapi.com/check" 34 | CHARACTER_LIMIT_MAX = 8000 35 | CHARACTER_LIMIT_MIN = 7500 36 | INPUT_MAX_REQUESTS = 10 37 | 38 | def __init__(self, language_server: LanguageServer, config: dict, name: str): 39 | super().__init__(language_server, config, name) 40 | # TODO save this somewhere 41 | self._remaining_requests = None 42 | if GrammarBotAnalyser.CONFIGURATION_API_KEY not in self.config: 43 | raise ConfigurationError('Required parameter: grammarbot.api_key') 44 | self._headers = { 45 | 'content-type': 'application/x-www-form-urlencoded', 46 | 'X-RapidAPI-Key': self.config[GrammarBotAnalyser.CONFIGURATION_API_KEY], 47 | 'X-RapidAPI-Host': 'grammarbot.p.rapidapi.com' 48 | } 49 | 50 | def _handle_analyses(self, doc: BaseDocument, analyses, text_sections=None): 51 | diagnostics = list() 52 | code_actions = list() 53 | source = doc.cleaned_source 54 | text_ends = None 55 | if text_sections is not None: 56 | text_ends = [section[1] for section in text_sections] 57 | 58 | for match in analyses: 59 | offset = match['offset'] 60 | length = match['length'] 61 | if text_ends is not None: 62 | idx = bisect.bisect_left(text_ends, offset) 63 | if idx == 0: 64 | offset = text_sections[idx][0] + offset 65 | else: 66 | offset = text_sections[idx][0] + offset - text_ends[idx-1] 67 | 68 | token = source[offset:offset+length] 69 | diagnostic = Diagnostic( 70 | range=doc.range_at_offset(offset, length+1, True), 71 | message=f'"{token}": {match["message"]}', 72 | source='grammarbot', 73 | severity=self.get_severity(), 74 | code=f'grammarbot:{match["rule"]["id"]}', 75 | ) 76 | diagnostics.append(diagnostic) 77 | if len(match['replacements']) > 0: 78 | for item in match['replacements']: 79 | replacement = item['value'] 80 | action = self.build_single_suggestion_action( 81 | doc=doc, 82 | title=f'"{token}" -> "{replacement}"', 83 | edit=TextEdit( 84 | range=diagnostic.range, 85 | new_text=replacement, 86 | ), 87 | diagnostic=diagnostic, 88 | ) 89 | code_actions.append(action) 90 | 91 | return diagnostics, code_actions 92 | 93 | def _did_open(self, doc: BaseDocument): 94 | diagnostics, code_actions = self._handle_analyses( 95 | doc, 96 | self._analyse_text(doc.cleaned_source) 97 | ) 98 | self.add_diagnostics(doc, diagnostics) 99 | self.add_code_actions(doc, code_actions) 100 | 101 | def _did_change(self, doc: BaseDocument, changes: List[Interval]): 102 | text = '' 103 | # (in_text_start_offset, in_analysis_text_end_offset_inclusive) 104 | text_sections = list() 105 | checked = set() 106 | 107 | for change in changes: 108 | paragraph = doc.paragraph_at_offset( 109 | change.start, 110 | min_offset=change.start + change.length-1, 111 | cleaned=True, 112 | ) 113 | if paragraph in checked: 114 | continue 115 | checked.add(paragraph) 116 | 117 | pos_range = doc.range_at_offset( 118 | paragraph.start, 119 | paragraph.length, 120 | True 121 | ) 122 | self.remove_code_items_at_range(doc, pos_range) 123 | 124 | paragraph_text = doc.text_at_offset(paragraph.start, paragraph.length) 125 | text += paragraph_text 126 | text += '\n' 127 | text_sections.append((paragraph.start, len(text))) 128 | 129 | diagnostics, code_actions = self._handle_analyses( 130 | doc, 131 | self._analyse_text(text), 132 | text_sections 133 | ) 134 | 135 | self.add_diagnostics(doc, diagnostics) 136 | self.add_code_actions(doc, code_actions) 137 | 138 | def _analyse_text(self, text): 139 | spans = list(batch_text( 140 | text, 141 | TEXT_PASSAGE_PATTERN, 142 | GrammarBotAnalyser.CHARACTER_LIMIT_MAX, 143 | GrammarBotAnalyser.CHARACTER_LIMIT_MIN, 144 | )) 145 | limit = self.config.setdefault( 146 | GrammarBotAnalyser.CONFIGURATION_INPUT_MAX_REQUESTS, 147 | GrammarBotAnalyser.INPUT_MAX_REQUESTS 148 | ) 149 | if len(spans) > limit: 150 | # Safety measure 151 | raise AnalysisError(f'Too large input. Size: {len(spans)}, max: {limit}') 152 | 153 | offset = 0 154 | for span in spans: 155 | for item in self._analyse_api_call(span): 156 | item['offset'] += offset 157 | yield item 158 | 159 | offset += len(span) 160 | 161 | def _analyse_api_call(self, text): 162 | if self._remaining_requests is not None: 163 | overflow = self.config.setdefault( 164 | GrammarBotAnalyser.CONFIGURATION_REQUESTS_OVERFLOW, 165 | 0 166 | ) 167 | if self._remaining_requests + overflow <= 0: 168 | raise AnalysisError('Requests quota reached.') 169 | 170 | urltext = urllib.parse.quote(text) 171 | payload = f'text={urltext}&language=en-US' 172 | 173 | response = requests.request( 174 | "POST", 175 | GrammarBotAnalyser.URL, 176 | data=payload, 177 | headers=self._headers 178 | ) 179 | data = response.json() 180 | 181 | if 'matches' not in data: 182 | if 'message' in data: 183 | raise AnalysisError(data['message']) 184 | if 'error' in data: 185 | raise AnalysisError(data['error']) 186 | 187 | self._remaining_requests = int( 188 | response.headers['X-RateLimit-Requests-Remaining'] 189 | ) 190 | 191 | for match in data['matches']: 192 | yield match 193 | -------------------------------------------------------------------------------- /textLSP/analysers/handler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | 4 | from typing import List, Optional 5 | from lsprotocol.types import MessageType 6 | from lsprotocol.types import ( 7 | DidOpenTextDocumentParams, 8 | DidChangeTextDocumentParams, 9 | DidCloseTextDocumentParams, 10 | DidSaveTextDocumentParams, 11 | TextDocumentContentChangeEvent, 12 | CodeActionParams, 13 | CodeAction, 14 | CompletionParams, 15 | CompletionList, 16 | ) 17 | from pygls.workspace import Document 18 | 19 | from .. import analysers 20 | from .analyser import Analyser, AnalysisError 21 | from ..utils import get_class 22 | from ..types import ConfigurationError, ProgressBar 23 | 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | class AnalyserHandler(): 29 | 30 | def __init__(self, language_server, settings=None): 31 | self.language_server = language_server 32 | self.analysers = dict() 33 | self.update_settings(settings) 34 | 35 | def update_settings(self, settings): 36 | if settings is None: 37 | return 38 | 39 | old_analysers = self.analysers 40 | self.analysers = dict() 41 | for name, config in settings.items(): 42 | if not config.setdefault('enabled', False): 43 | continue 44 | if name in old_analysers: 45 | analyser = old_analysers[name] 46 | analyser.update_settings(config) 47 | self.analysers[name] = analyser 48 | else: 49 | try: 50 | with ProgressBar(self.language_server, f'{name} init'): 51 | cls = get_class( 52 | '{}.{}'.format(analysers.__name__, name), 53 | Analyser, 54 | ) 55 | self.analysers[name] = cls( 56 | self.language_server, 57 | config, 58 | name 59 | ) 60 | except ImportError as e: 61 | self.language_server.show_message( 62 | f"Error ({name}): {str(e)}", 63 | MessageType.Error, 64 | ) 65 | except ConfigurationError as e: 66 | self.language_server.show_message( 67 | f"Error ({name}): {str(e)}", 68 | MessageType.Error, 69 | ) 70 | 71 | for name, analyser in old_analysers.items(): 72 | if name not in self.analysers: 73 | analyser.close() 74 | 75 | def shutdown(self): 76 | for analyser in self.analysers.values(): 77 | analyser.close() 78 | 79 | def get_diagnostics(self, doc: Document): 80 | try: 81 | return [ 82 | analyser.get_diagnostics(doc) 83 | for analyser in self.analysers.values() 84 | ] 85 | except Exception as e: 86 | self.language_server.show_message( 87 | str('Server error. See log for details.'), 88 | MessageType.Error, 89 | ) 90 | logger.exception(str(e)) 91 | return [] 92 | 93 | def get_code_actions(self, params: CodeActionParams) -> Optional[List[CodeAction]]: 94 | res = list() 95 | try: 96 | for analyser in self.analysers.values(): 97 | tmp_lst = analyser.get_code_actions(params) 98 | if tmp_lst is not None and len(tmp_lst) > 0: 99 | res.extend(tmp_lst) 100 | except Exception as e: 101 | self.language_server.show_message( 102 | str('Server error. See log for details.'), 103 | MessageType.Error, 104 | ) 105 | logger.exception(str(e)) 106 | 107 | return res if len(res) > 0 else None 108 | 109 | async def _submit_task(self, function, *args, **kwargs): 110 | functions = list() 111 | for name, analyser in self.analysers.items(): 112 | functions.append( 113 | self.language_server.loop.create_task( 114 | function(name, analyser, *args, **kwargs) 115 | ) 116 | ) 117 | 118 | if len(functions) == 0: 119 | return 120 | 121 | done, pending = await asyncio.wait(functions) 122 | for task in done: 123 | try: 124 | task.result() 125 | except Exception as e: 126 | self.language_server.show_message( 127 | str('Server error. See log for details.'), 128 | MessageType.Error, 129 | ) 130 | logger.exception(str(e)) 131 | 132 | async def _did_open( 133 | self, 134 | analyser_name: str, 135 | analyser: Analyser, 136 | params: DidOpenTextDocumentParams, 137 | ): 138 | try: 139 | analyser.did_open( 140 | params, 141 | ) 142 | except AnalysisError as e: 143 | self.language_server.show_message( 144 | str(f'{analyser_name}: {e}'), 145 | MessageType.Error, 146 | ) 147 | 148 | async def did_open(self, params: DidOpenTextDocumentParams): 149 | await self._submit_task( 150 | self._did_open, 151 | params=params 152 | ) 153 | 154 | async def _did_change( 155 | self, 156 | analyser_name: str, 157 | analyser: Analyser, 158 | params: DidChangeTextDocumentParams, 159 | ): 160 | try: 161 | analyser.did_change( 162 | params, 163 | ) 164 | except AnalysisError as e: 165 | self.language_server.show_message( 166 | str(f'{analyser_name}: {e}'), 167 | MessageType.Error, 168 | ) 169 | 170 | async def did_change(self, params: DidChangeTextDocumentParams): 171 | await self._submit_task( 172 | self._did_change, 173 | params=params 174 | ) 175 | 176 | async def _did_save( 177 | self, 178 | analyser_name: str, 179 | analyser: Analyser, 180 | params: DidSaveTextDocumentParams, 181 | ): 182 | try: 183 | analyser.did_save( 184 | params, 185 | ) 186 | except AnalysisError as e: 187 | self.language_server.show_message( 188 | str(f'{analyser_name}: {e}'), 189 | MessageType.Error, 190 | ) 191 | 192 | async def did_save(self, params: DidSaveTextDocumentParams): 193 | await self._submit_task( 194 | self._did_save, 195 | params=params 196 | ) 197 | 198 | async def _did_close( 199 | self, 200 | analyser_name: str, 201 | analyser: Analyser, 202 | params: DidCloseTextDocumentParams 203 | ): 204 | analyser.did_close( 205 | params, 206 | ) 207 | 208 | async def did_close(self, params: DidCloseTextDocumentParams): 209 | await self._submit_task( 210 | self._did_close, 211 | params=params 212 | ) 213 | 214 | async def _command_analyse( 215 | self, 216 | analyser_name: str, 217 | analyser: Analyser, 218 | args, 219 | ): 220 | try: 221 | analyser.command_analyse(*args) 222 | except AnalysisError as e: 223 | self.language_server.show_message( 224 | str(f'{analyser_name}: {e}'), 225 | MessageType.Error, 226 | ) 227 | 228 | async def command_analyse(self, *args): 229 | args = args[0] 230 | if 'analyser' in args[0]: 231 | analyser_name = args[0].pop('analyser') 232 | analyser = self.analysers[analyser_name] 233 | try: 234 | analyser.command_analyse(*args) 235 | except AnalysisError as e: 236 | self.language_server.show_message( 237 | str(f'{analyser_name}: {e}'), 238 | MessageType.Error, 239 | ) 240 | except Exception as e: 241 | self.language_server.show_message( 242 | str('Server error. See log for details.'), 243 | MessageType.Error, 244 | ) 245 | logger.exception(str(e)) 246 | else: 247 | await self._submit_task(self._command_analyse, args) 248 | 249 | async def command_custom_command(self, *args): 250 | args = args[0][0] 251 | assert 'analyser' in args 252 | analyser = self.analysers[args.pop('analyser')] 253 | command = args.pop('command') 254 | ext_command = f'command_{command}' 255 | 256 | if hasattr(analyser, ext_command): 257 | try: 258 | getattr(analyser, ext_command)(**args) 259 | except Exception as e: 260 | self.language_server.show_message( 261 | str('Server error. See log for details.'), 262 | MessageType.Error, 263 | ) 264 | logger.exception(str(e)) 265 | else: 266 | self.language_server.show_message( 267 | str(f'No custom command supported by {analyser}: {command}'), 268 | MessageType.Error, 269 | ) 270 | 271 | def update_document(self, doc: Document, change: TextDocumentContentChangeEvent): 272 | for name, analyser in self.analysers.items(): 273 | analyser.update_document(doc, change) 274 | 275 | def get_completions(self, params: Optional[CompletionParams] = None) -> CompletionList: 276 | comp_lst = list() 277 | try: 278 | for _, analyser in self.analysers.items(): 279 | tmp = analyser.get_completions(params) 280 | if tmp is not None and len(tmp) > 0: 281 | comp_lst.extend(tmp) 282 | except Exception as e: 283 | self.language_server.show_message( 284 | str('Server error. See log for details.'), 285 | MessageType.Error, 286 | ) 287 | logger.exception(str(e)) 288 | 289 | return CompletionList( 290 | is_incomplete=False, 291 | items=comp_lst, 292 | ) 293 | -------------------------------------------------------------------------------- /textLSP/analysers/hf_checker/__init__.py: -------------------------------------------------------------------------------- 1 | from .hf_checker import HFCheckerAnalyser 2 | -------------------------------------------------------------------------------- /textLSP/analysers/hf_checker/hf_checker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from re import Match 4 | from itertools import chain 5 | from typing import List, Tuple 6 | from lsprotocol.types import ( 7 | Diagnostic, 8 | Range, 9 | Position, 10 | TextEdit, 11 | CodeAction, 12 | MessageType, 13 | ) 14 | from pygls.server import LanguageServer 15 | from transformers import pipeline 16 | 17 | from ..analyser import Analyser 18 | from ...types import ( 19 | Interval, 20 | LINE_PATTERN, 21 | TokenDiff, 22 | ConfigurationError, 23 | ) 24 | from ...documents.document import BaseDocument 25 | from ... import nn_utils 26 | 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | class HFCheckerAnalyser(Analyser): 32 | CONFIGURATION_GPU = 'gpu' 33 | CONFIGURATION_MODEL = 'model' 34 | CONFIGURATION_MIN_LENGTH = 'min_length' 35 | CONFIGURATION_QUANTIZE = 'quantize' 36 | 37 | SETTINGS_DEFAULT_GPU = False 38 | SETTINGS_DEFAULT_MODEL = 'grammarly/coedit-large' 39 | SETTINGS_DEFAULT_MIN_LENGTH = 0 40 | SETTINGS_DEFAULT_QUANTIZE = 32 41 | 42 | def __init__(self, language_server: LanguageServer, config: dict, name: str): 43 | super().__init__(language_server, config, name) 44 | use_gpu = self.config.get(self.CONFIGURATION_GPU, self.SETTINGS_DEFAULT_GPU) 45 | device = nn_utils.get_device(use_gpu) 46 | 47 | quanitze = self.config.setdefault(self.CONFIGURATION_QUANTIZE, self.SETTINGS_DEFAULT_QUANTIZE) 48 | model_kwargs = dict() 49 | try: 50 | nn_utils.set_quantization_args(quanitze, device, model_kwargs) 51 | except ConfigurationError as e: 52 | language_server.show_message( 53 | f'{self.name}: {str(e)}', 54 | MessageType.Error, 55 | ) 56 | self.config[self.CONFIGURATION_QUANTIZE] = 32 57 | 58 | model = self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL) 59 | self._corrector = pipeline( 60 | 'text2text-generation', 61 | model, 62 | device=device, 63 | model_kwargs=model_kwargs, 64 | ) 65 | 66 | def corrector(self, text): 67 | return self._corrector(text) 68 | 69 | def _analyse_lines(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeAction]]: 70 | diagnostics = list() 71 | code_actions = list() 72 | 73 | sidx = 0 74 | for match in chain(LINE_PATTERN.finditer(text), [len(text)]): 75 | if type(match) == Match: 76 | eidx = match.end() 77 | else: 78 | eidx = match 79 | if sidx == eidx: 80 | continue 81 | 82 | line = text[sidx:eidx] 83 | diags, actions = self._analyse(line, doc, sidx+offset) 84 | diagnostics.extend(diags) 85 | code_actions.extend(actions) 86 | 87 | sidx = eidx 88 | 89 | return diagnostics, code_actions 90 | 91 | def _analyse(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeAction]]: 92 | text = text.strip() 93 | if len(text) < self.config.get(self.CONFIGURATION_MIN_LENGTH, self.SETTINGS_DEFAULT_MIN_LENGTH): 94 | return [], [] 95 | 96 | diagnostics = list() 97 | code_actions = list() 98 | 99 | corrected = self.corrector(text) 100 | if len(corrected) == 0: 101 | return [], [] 102 | corrected = corrected.pop(0)['generated_text'] 103 | 104 | edits = TokenDiff.token_level_diff(text, corrected.strip()) 105 | for edit in edits: 106 | if edit.type == TokenDiff.INSERT: 107 | if edit.offset >= len(text): 108 | edit.new_token = f' {edit.new_token}' 109 | else: 110 | edit.new_token = f' {edit.new_token} ' 111 | edit.old_token = ' ' 112 | edit.offset -= 1 113 | edit.length += 1 114 | 115 | token = edit.old_token 116 | 117 | if edit.offset+offset >= len(doc.cleaned_source): 118 | edit.offset -= 1 119 | range = doc.range_at_offset(edit.offset+offset, edit.length, True) 120 | range = Range( 121 | start=range.start, 122 | end=Position( 123 | line=range.end.line, 124 | character=range.end.character+1, 125 | ) 126 | ) 127 | 128 | if edit.type == TokenDiff.INSERT: 129 | message = f'insert "{edit.new_token}"' 130 | elif edit.type == TokenDiff.REPLACE: 131 | message = f'"{token}": use "{edit.new_token}" instead' 132 | else: 133 | message = f'"{token}": remove' 134 | diagnostic = Diagnostic( 135 | range=range, 136 | message=message, 137 | source='hf_checker', 138 | severity=self.get_severity(), 139 | code=f'hf_checker:{edit.type}', 140 | ) 141 | action = self.build_single_suggestion_action( 142 | doc=doc, 143 | title=f'"{token}" -> "{edit.new_token}"', 144 | edit=TextEdit( 145 | range=diagnostic.range, 146 | new_text=edit.new_token, 147 | ), 148 | diagnostic=diagnostic, 149 | ) 150 | code_actions.append(action) 151 | diagnostics.append(diagnostic) 152 | 153 | return diagnostics, code_actions 154 | 155 | def _did_open(self, doc: BaseDocument): 156 | diagnostics, actions = self._analyse_lines(doc.cleaned_source, doc) 157 | self.add_diagnostics(doc, diagnostics) 158 | self.add_code_actions(doc, actions) 159 | 160 | def _did_change(self, doc: BaseDocument, changes: List[Interval]): 161 | diagnostics = list() 162 | code_actions = list() 163 | checked = set() 164 | for change in changes: 165 | paragraph = doc.paragraph_at_offset( 166 | change.start, 167 | min_offset=change.start + change.length-1, 168 | cleaned=True, 169 | ) 170 | if paragraph in checked: 171 | continue 172 | 173 | pos_range = doc.range_at_offset( 174 | paragraph.start, 175 | paragraph.length, 176 | True 177 | ) 178 | self.remove_code_items_at_range(doc, pos_range) 179 | 180 | diags, actions = self._analyse_lines( 181 | doc.text_at_offset( 182 | paragraph.start, 183 | paragraph.length, 184 | True 185 | ), 186 | doc, 187 | paragraph.start, 188 | ) 189 | 190 | diagnostics.extend([ 191 | diag 192 | for diag in diags 193 | if diag.range.start >= pos_range.start 194 | ]) 195 | code_actions.extend([ 196 | action 197 | for action in actions 198 | if action.edit.document_changes[0].edits[0].range.start >= pos_range.start 199 | ]) 200 | 201 | checked.add(paragraph) 202 | self.add_diagnostics(doc, diagnostics) 203 | self.add_code_actions(doc, code_actions) 204 | -------------------------------------------------------------------------------- /textLSP/analysers/hf_completion/__init__.py: -------------------------------------------------------------------------------- 1 | from .hf_completion import HFCompletionAnalyser 2 | -------------------------------------------------------------------------------- /textLSP/analysers/hf_completion/hf_completion.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import Optional, List 4 | from lsprotocol.types import ( 5 | CompletionParams, 6 | CompletionItem, 7 | CompletionList, 8 | CodeActionParams, 9 | CodeAction, 10 | ) 11 | from pygls.server import LanguageServer 12 | from lsprotocol.types import MessageType 13 | from transformers import pipeline 14 | 15 | from ..analyser import Analyser 16 | from ...types import ConfigurationError 17 | from ... import nn_utils 18 | 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class HFCompletionAnalyser(Analyser): 24 | CONFIGURATION_GPU = 'gpu' 25 | CONFIGURATION_MODEL = 'model' 26 | CONFIGURATION_TOP_K = 'topk' 27 | CONFIGURATION_CONTEXT_SIZE = 'context_size' 28 | CONFIGURATION_QUANTIZE = 'quantize' 29 | 30 | SETTINGS_DEFAULT_GPU = False 31 | SETTINGS_DEFAULT_MODEL = 'bert-base-multilingual-cased' 32 | SETTINGS_DEFAULT_TOP_K = 5 33 | SETTINGS_DEFAULT_CONTEXT_SIZE = 50 34 | SETTINGS_DEFAULT_QUANTIZE = 32 35 | 36 | def __init__(self, language_server: LanguageServer, config: dict, name: str): 37 | super().__init__(language_server, config, name) 38 | use_gpu = self.config.get(self.CONFIGURATION_GPU, self.SETTINGS_DEFAULT_GPU) 39 | device = nn_utils.get_device(use_gpu) 40 | 41 | quanitze = self.config.setdefault(self.CONFIGURATION_QUANTIZE, self.SETTINGS_DEFAULT_QUANTIZE) 42 | model_kwargs = dict() 43 | try: 44 | nn_utils.set_quantization_args(quanitze, device, model_kwargs) 45 | except ConfigurationError as e: 46 | language_server.show_message( 47 | f'{self.name}: {str(e)}', 48 | MessageType.Error, 49 | ) 50 | self.config[self.CONFIGURATION_QUANTIZE] = 32 51 | 52 | model = self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL) 53 | self.completor = pipeline( 54 | 'fill-mask', 55 | model, 56 | device=device, 57 | model_kwargs=model_kwargs, 58 | ) 59 | if self.completor.tokenizer.mask_token is None: 60 | raise ConfigurationError(f'The tokenizer of {model} does not have a MASK token.') 61 | 62 | def should_run_on(self, event: str) -> bool: 63 | return False 64 | 65 | def get_code_actions(self, params: CodeActionParams) -> Optional[List[CodeAction]]: 66 | return None 67 | 68 | def get_completions(self, params: Optional[CompletionParams] = None) -> Optional[CompletionList]: 69 | doc = self.get_document(params) 70 | doc_len = len(doc.cleaned_source) 71 | offset = doc.offset_at_position(params.position, True) 72 | offset = max(0, min(offset, doc_len-1)) 73 | in_paragraph_offset = self.config.get(self.CONFIGURATION_CONTEXT_SIZE, self.SETTINGS_DEFAULT_CONTEXT_SIZE) 74 | start = max(0, offset-in_paragraph_offset) 75 | length = min(doc_len-offset+in_paragraph_offset, 2*in_paragraph_offset) 76 | in_paragraph_offset = offset-start-1 # we need the character before the position 77 | if in_paragraph_offset >= length: 78 | return None 79 | 80 | paragraph = doc.cleaned_source[start:start+length] 81 | # we look for whitespace in the uncleaned source since some positions 82 | # in the file might not be mapped to the cleaned_source which leads to 83 | # unexpected behaviour 84 | uncleaned_offset = max(0, doc.offset_at_position(params.position)-1) 85 | # XXX: this still gets activated in e.g. commented lines 86 | if doc.source[uncleaned_offset] in {' ', '\n'}: 87 | return self._get_text_completions(paragraph, in_paragraph_offset) 88 | 89 | def _get_text_completions(self, paragraph, in_paragraph_offset): 90 | input = '' 91 | if in_paragraph_offset > 0: 92 | input += paragraph[:in_paragraph_offset+1].strip(' ') 93 | if input[-1] != '\n': 94 | input += ' ' 95 | input += f'{self.completor.tokenizer.mask_token} ' 96 | if in_paragraph_offset < len(paragraph) - 1: 97 | input += paragraph[in_paragraph_offset+1:].strip() 98 | 99 | res = list() 100 | for item in self.completor( 101 | input, 102 | top_k=self.config.get(self.CONFIGURATION_TOP_K, self.SETTINGS_DEFAULT_TOP_K), 103 | ): 104 | completion_item = CompletionItem( 105 | label=item['token_str'], 106 | ) 107 | res.append(completion_item) 108 | return res 109 | -------------------------------------------------------------------------------- /textLSP/analysers/hf_instruction_checker/__init__.py: -------------------------------------------------------------------------------- 1 | from .hf_instruction_checker import HFInstructionCheckerAnalyser 2 | -------------------------------------------------------------------------------- /textLSP/analysers/hf_instruction_checker/hf_instruction_checker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import Optional, List 4 | from pygls.server import LanguageServer 5 | from lsprotocol.types import ( 6 | Diagnostic, 7 | Range, 8 | Position, 9 | TextEdit, 10 | CodeAction, 11 | WorkspaceEdit, 12 | Command, 13 | CodeActionParams, 14 | TextDocumentEdit, 15 | VersionedTextDocumentIdentifier, 16 | CompletionParams, 17 | CompletionList, 18 | CompletionItem, 19 | MessageType, 20 | ) 21 | 22 | from ..hf_checker import HFCheckerAnalyser 23 | from ...types import ProgressBar, Interval 24 | 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class HFInstructionCheckerAnalyser(HFCheckerAnalyser): 30 | CONFIGURATION_INSTRUCTION = 'instruction' 31 | CONFIGURATION_PROMPT_MAGIC = 'prompt_magic' 32 | 33 | SETTINGS_DEFAULT_INSTRUCTION = 'Fix the grammar:' 34 | SETTINGS_DEFAULT_PROMPT_MAGIC = '%HF% ' 35 | 36 | def __init__(self, language_server: LanguageServer, config: dict, name: str): 37 | super().__init__(language_server, config, name) 38 | 39 | instruction = self.config.get(self.CONFIGURATION_INSTRUCTION, self.SETTINGS_DEFAULT_INSTRUCTION) 40 | if instruction is None: 41 | self.config[self.CONFIGURATION_INSTRUCTION] = '' 42 | 43 | def corrector(self, text): 44 | instruction = self.config.get(self.CONFIGURATION_INSTRUCTION, self.SETTINGS_DEFAULT_INSTRUCTION) 45 | inp = f'{instruction} {text}' if len(instruction) > 0 else text 46 | 47 | return self._corrector(inp) 48 | 49 | def get_completions(self, params: Optional[CompletionParams] = None) -> Optional[CompletionList]: 50 | if params.position == Position(line=0, character=0): 51 | return None 52 | 53 | doc = self.get_document(params) 54 | line = doc.lines[params.position.line] 55 | magic = self.config.get(self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC) 56 | 57 | line_prefix = line[:params.position.character].strip() 58 | if len(line_prefix) == 0 or line_prefix in magic: 59 | return [ 60 | CompletionItem( 61 | label=magic, 62 | detail='hf_instruction_checker magic command for text' 63 | ' generation based on the prompt that follows.' 64 | ) 65 | ] 66 | 67 | def command_generate( 68 | self, 69 | uri: str, 70 | interval: str, 71 | ): 72 | with ProgressBar( 73 | self.language_server, 74 | f'{self.name} generating', 75 | token=self._progressbar_token 76 | ): 77 | magic = self.config.get(self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC) 78 | doc = self.get_document(uri) 79 | interval = Interval(**eval(interval)) 80 | range = doc.range_at_offset(interval.start, interval.length, False) 81 | lines = doc.lines[range.start.line:range.end.line+1] 82 | lines[0] = lines[0][lines[0].find(magic)+len(magic):] 83 | prompt = '\n'.join(lines) 84 | 85 | new_text = self._corrector(prompt) 86 | if len(new_text) == 0: 87 | return 88 | new_text = new_text.pop(0)['generated_text'] 89 | new_text += '\n' 90 | 91 | edit = WorkspaceEdit( 92 | document_changes=[ 93 | TextDocumentEdit( 94 | text_document=VersionedTextDocumentIdentifier( 95 | uri=doc.uri, 96 | version=doc.version, 97 | ), 98 | edits=[ 99 | TextEdit( 100 | range=range, 101 | new_text=new_text, 102 | ), 103 | 104 | ] 105 | ) 106 | ] 107 | ) 108 | self.language_server.apply_edit(edit, 'textlsp.openai.generate') 109 | 110 | def get_code_actions(self, params: CodeActionParams) -> Optional[List[CodeAction]]: 111 | doc = self.get_document(params) 112 | res = super().get_code_actions(params) 113 | 114 | if len(doc.lines) > 0: 115 | line = doc.lines[params.range.start.line].strip() 116 | else: 117 | line = '' 118 | magic = self.config.get(self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC) 119 | if magic in line: 120 | if res is None: 121 | res = list() 122 | 123 | start_offset = doc.offset_at_position(params.range.start) 124 | end_offset = doc.offset_at_position(params.range.end) 125 | paragraphs = doc.paragraphs_at_offset( 126 | start_offset, 127 | min_offset=end_offset, 128 | cleaned=False 129 | ) 130 | 131 | if doc.position_at_offset(paragraphs[0].start, False).line != params.range.start.line: 132 | # only if prompt is the first line of the paragraph 133 | return res 134 | 135 | start_offset = paragraphs[0].start 136 | end_offset = paragraphs[-1].start+paragraphs[-1].length 137 | interval = Interval(start_offset, end_offset) 138 | interval = str({ 139 | 'start': start_offset, 140 | 'length': end_offset-start_offset+1, 141 | }) 142 | title = 'Prompt HF' 143 | res.append( 144 | self.build_command_action( 145 | doc=doc, 146 | title=title, 147 | command=Command( 148 | title=title, 149 | command=self.language_server.COMMAND_CUSTOM, 150 | arguments=[{ 151 | 'command': 'generate', 152 | 'analyser': self.name, 153 | 'uri': doc.uri, 154 | 'interval': interval, 155 | }], 156 | ), 157 | ) 158 | ) 159 | 160 | return res 161 | -------------------------------------------------------------------------------- /textLSP/analysers/languagetool/__init__.py: -------------------------------------------------------------------------------- 1 | from .languagetool import LanguageToolAnalyser 2 | -------------------------------------------------------------------------------- /textLSP/analysers/languagetool/languagetool.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import List, Tuple 4 | from language_tool_python import LanguageTool 5 | from lsprotocol.types import ( 6 | Diagnostic, 7 | Range, 8 | Position, 9 | TextEdit, 10 | CodeAction, 11 | MessageType, 12 | ) 13 | from pygls.server import LanguageServer 14 | 15 | from ..analyser import Analyser 16 | from ...types import Interval 17 | from ...documents.document import BaseDocument 18 | 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | LANGUAGE_MAP = dict() 24 | LANGUAGE_MAP['en'] = 'en-US' 25 | 26 | DEFAULT_LANGUAGE = 'en' 27 | 28 | 29 | class LanguageToolAnalyser(Analyser): 30 | def __init__(self, language_server: LanguageServer, config: dict, name: str): 31 | super().__init__(language_server, config, name) 32 | self.tools = dict() 33 | self._tool_backoff = dict() 34 | 35 | def _analyse(self, text, doc, offset=0) -> Tuple[List[Diagnostic], List[CodeAction]]: 36 | diagnostics = list() 37 | code_actions = list() 38 | matches = self._get_tool_for_language(doc.language).check(text) 39 | 40 | for match in matches: 41 | token = text[match.offset:match.offset+match.errorLength] 42 | 43 | range = doc.range_at_offset(match.offset+offset, match.errorLength, True) 44 | range = Range( 45 | start=range.start, 46 | end=Position( 47 | line=range.end.line, 48 | character=range.end.character+1, 49 | ) 50 | ) 51 | diagnostic = Diagnostic( 52 | range=range, 53 | message=f'"{token}": {match.message}', 54 | source='languagetool', 55 | severity=self.get_severity(), 56 | code=f'languagetool:{match.ruleId}', 57 | ) 58 | if len(match.replacements) > 0: 59 | for replacement in match.replacements: 60 | action = self.build_single_suggestion_action( 61 | doc=doc, 62 | title=f'"{token}" -> "{replacement}"', 63 | edit=TextEdit( 64 | range=diagnostic.range, 65 | new_text=replacement, 66 | ), 67 | diagnostic=diagnostic, 68 | ) 69 | code_actions.append(action) 70 | diagnostics.append(diagnostic) 71 | 72 | return diagnostics, code_actions 73 | 74 | def _did_open(self, doc: BaseDocument): 75 | diagnostics, actions = self._analyse(doc.cleaned_source, doc) 76 | self.add_diagnostics(doc, diagnostics) 77 | self.add_code_actions(doc, actions) 78 | 79 | def _did_change(self, doc: BaseDocument, changes: List[Interval]): 80 | diagnostics = list() 81 | code_actions = list() 82 | checked = set() 83 | doc_length = len(doc.cleaned_source) 84 | for change in changes: 85 | paragraph = doc.paragraph_at_offset( 86 | change.start, 87 | min_offset=change.start + change.length-1, 88 | cleaned=True, 89 | ) 90 | if paragraph in checked: 91 | continue 92 | 93 | # get sentences before paragraph for context check 94 | n = 2 95 | min_sent_len = 4 96 | start_sent = paragraph 97 | while n > 0: 98 | pos = start_sent.start - 1 - min_sent_len 99 | if pos < 0: 100 | break 101 | 102 | start_sent = doc.sentence_at_offset( 103 | pos, 104 | min_length=min_sent_len, 105 | cleaned=True 106 | ) 107 | if len(doc.text_at_offset(start_sent.start, start_sent.length, True).strip()) > 0: 108 | n -= 1 109 | 110 | # get sentences after paragraph for context check 111 | n = 2 112 | end_sent = paragraph 113 | while n > 0: 114 | pos = end_sent.start + end_sent.length 115 | if pos >= doc_length: 116 | break 117 | 118 | end_sent = doc.sentence_at_offset( 119 | pos, 120 | min_length=min_sent_len, 121 | cleaned=True 122 | ) 123 | if len(doc.text_at_offset(end_sent.start, end_sent.length, True).strip()) > 0: 124 | n -= 1 125 | ################################################################### 126 | 127 | pos_range = doc.range_at_offset( 128 | paragraph.start, 129 | end_sent.start-paragraph.start-1 + end_sent.length, 130 | True 131 | ) 132 | self.remove_code_items_at_range(doc, pos_range) 133 | 134 | diags, actions = self._analyse( 135 | doc.text_at_offset( 136 | start_sent.start, 137 | end_sent.start-start_sent.start-1 + end_sent.length, 138 | True 139 | ), 140 | doc, 141 | start_sent.start, 142 | ) 143 | 144 | diagnostics.extend([ 145 | diag 146 | for diag in diags 147 | if diag.range.start >= pos_range.start 148 | ]) 149 | code_actions.extend([ 150 | action 151 | for action in actions 152 | if action.edit.document_changes[0].edits[0].range.start >= pos_range.start 153 | ]) 154 | 155 | checked.add(paragraph) 156 | self.add_diagnostics(doc, diagnostics) 157 | self.add_code_actions(doc, code_actions) 158 | 159 | def _did_close(self, doc: BaseDocument): 160 | workspace = self.language_server.workspace 161 | doc_langs = { 162 | document.language 163 | for _, document in workspace.documents.items() 164 | } 165 | tool_langs = set(self.tools.keys()) 166 | 167 | for lang in tool_langs - doc_langs: 168 | if any( 169 | lang2 in doc_langs 170 | for lang2, backoff in self._tool_backoff.items() 171 | if backoff == lang 172 | ): 173 | # do not close a language that is still used by other languages as backoff 174 | # XXX: not the most efficient but assuming there's not a lot of backed-off 175 | # languages around, this should be fast 176 | continue 177 | 178 | 179 | if lang in self.tools: 180 | self.tools[lang].close() 181 | del self.tools[lang] 182 | 183 | def close(self): 184 | for lang, tool in self.tools.items(): 185 | tool.close() 186 | self.tool = dict() 187 | 188 | def __del__(self): 189 | self.close() 190 | 191 | def _get_mapped_language(self, language): 192 | return LANGUAGE_MAP.get(language, language) 193 | 194 | def _get_tool_for_language(self, language): 195 | lang = self._get_mapped_language(language) 196 | if lang in self.tools: 197 | return self.tools[lang] 198 | if lang in self._tool_backoff and self._tool_backoff[lang] in self.tools: 199 | return self.tools[self._tool_backoff[lang]] 200 | 201 | try: 202 | tool = LanguageTool(lang) 203 | self.tools[lang] = tool 204 | except ValueError: 205 | self.language_server.show_message( 206 | f'{self.name}: unsupported language: {lang}! Using {DEFAULT_LANGUAGE}', 207 | MessageType.Error, 208 | ) 209 | 210 | if lang == DEFAULT_LANGUAGE: 211 | return ValueError("We shouldn't get here") 212 | 213 | tool = self._get_tool_for_language(DEFAULT_LANGUAGE) 214 | self._tool_backoff[lang] = DEFAULT_LANGUAGE 215 | 216 | return tool 217 | -------------------------------------------------------------------------------- /textLSP/analysers/ollama/__init__.py: -------------------------------------------------------------------------------- 1 | from .ollama import OllamaAnalyser 2 | -------------------------------------------------------------------------------- /textLSP/analysers/ollama/ollama.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List, Optional, Tuple 3 | 4 | from lsprotocol.types import ( 5 | CodeAction, 6 | CodeActionParams, 7 | Command, 8 | CompletionItem, 9 | CompletionList, 10 | CompletionParams, 11 | Diagnostic, 12 | MessageType, 13 | Position, 14 | Range, 15 | TextDocumentEdit, 16 | TextEdit, 17 | VersionedTextDocumentIdentifier, 18 | WorkspaceEdit, 19 | ) 20 | from pygls.server import LanguageServer 21 | 22 | import ollama 23 | 24 | from ...documents.document import BaseDocument 25 | from ...types import ConfigurationError, Interval, ProgressBar, TokenDiff 26 | from ..analyser import Analyser 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | class OllamaAnalyser(Analyser): 32 | CONFIGURATION_MODEL = "model" 33 | CONFIGURATION_KEEP_ALIVE = "keep_alive" 34 | CONFIGURATION_EDIT_INSTRUCTION = "edit_instruction" 35 | CONFIGURATION_TEMPERATURE = "temperature" 36 | CONFIGURATION_MAX_TOKEN = "max_token" 37 | CONFIGURATION_PROMPT_MAGIC = "prompt_magic" 38 | 39 | SETTINGS_DEFAULT_MODEL = "phi3:14b-instruct" 40 | SETTINGS_DEFAULT_KEEP_ALIVE = "10m" 41 | SETTINGS_DEFAULT_EDIT_INSTRUCTION = ( 42 | "Fix spelling and grammar errors of the" 43 | " input sentence. Print only the" 44 | " the corrected sentence even if it is correct. Input: " 45 | ) 46 | SETTINGS_DEFAULT_TEMPERATURE = 0 47 | SETTINGS_DEFAULT_MAX_TOKEN = 50 48 | SETTINGS_DEFAULT_PROMPT_MAGIC = "%OLLAMA% " 49 | SETTINGS_DEFAULT_CHECK_ON = { 50 | Analyser.CONFIGURATION_CHECK_ON_OPEN: False, 51 | Analyser.CONFIGURATION_CHECK_ON_CHANGE: False, 52 | Analyser.CONFIGURATION_CHECK_ON_SAVE: True, 53 | } 54 | 55 | def __init__(self, language_server: LanguageServer, config: dict, name: str): 56 | super().__init__(language_server, config, name) 57 | 58 | try: 59 | # test if the server is running 60 | ollama.list() 61 | except ConnectionError: 62 | raise ConfigurationError( 63 | "Ollama server is not running. Start it manually and restart textLSP." 64 | "To install Ollama see: https://ollama.com/download" 65 | ) 66 | 67 | try: 68 | # test if the model is available 69 | ollama.show( 70 | self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL), 71 | ) 72 | except ollama.ResponseError: 73 | try: 74 | with ProgressBar( 75 | self.language_server, 76 | f"{self.name} downloading {self.config.get(self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL)}", 77 | token=self._progressbar_token, 78 | ): 79 | ollama.pull( 80 | self.config.get( 81 | self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL 82 | ) 83 | ) 84 | except Exception as e: 85 | logger.exception(e, stack_info=True) 86 | raise ConfigurationError(f"{self.name}: {e}") 87 | 88 | def _generate(self, prompt, options=None, keep_alive=None): 89 | logger.debug(f"Generating for input: {prompt}") 90 | if options is None: 91 | options = { 92 | "seed": 42, 93 | "temperature": self.config.get( 94 | self.CONFIGURATION_TEMPERATURE, self.SETTINGS_DEFAULT_TEMPERATURE 95 | ), 96 | } 97 | if keep_alive is None: 98 | keep_alive = self.config.get( 99 | self.CONFIGURATION_KEEP_ALIVE, self.SETTINGS_DEFAULT_KEEP_ALIVE 100 | ) 101 | try: 102 | res = ollama.generate( 103 | model=self.config.get( 104 | self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL 105 | ), 106 | prompt=prompt, 107 | options=options, 108 | keep_alive=keep_alive, 109 | ) 110 | logger.debug(f"Generation output: {res}") 111 | except ollama.ResponseError as e: 112 | self.language_server.show_message( 113 | str(e), 114 | MessageType.Error, 115 | ) 116 | return None 117 | 118 | return res 119 | 120 | def _analyse( 121 | self, text, doc, offset=0 122 | ) -> Tuple[List[Diagnostic], List[CodeAction]]: 123 | diagnostics = list() 124 | code_actions = list() 125 | 126 | # we don not want trailing whitespace 127 | text = text.rstrip() 128 | 129 | res = self._generate( 130 | prompt=f"{self.config.get(self.CONFIGURATION_EDIT_INSTRUCTION, self.SETTINGS_DEFAULT_EDIT_INSTRUCTION)}{text}", 131 | ) 132 | if res is None: 133 | return [], [] 134 | 135 | edits = TokenDiff.token_level_diff(text, res["response"].strip()) 136 | 137 | for edit in edits: 138 | if edit.type == TokenDiff.INSERT: 139 | if edit.offset >= len(text): 140 | edit.new_token = f" {edit.new_token}" 141 | else: 142 | edit.new_token = f" {edit.new_token} " 143 | edit.old_token = " " 144 | edit.offset -= 1 145 | edit.length += 1 146 | 147 | token = edit.old_token 148 | 149 | range = doc.range_at_offset(edit.offset + offset, edit.length, True) 150 | range = Range( 151 | start=range.start, 152 | end=Position( 153 | line=range.end.line, 154 | character=range.end.character + 1, 155 | ), 156 | ) 157 | 158 | if edit.type == TokenDiff.INSERT: 159 | message = f'insert "{edit.new_token}"' 160 | elif edit.type == TokenDiff.REPLACE: 161 | message = f'"{token}": use "{edit.new_token}" instead' 162 | else: 163 | message = f'"{token}": remove' 164 | 165 | diagnostic = Diagnostic( 166 | range=range, 167 | message=message, 168 | source="ollama", 169 | severity=self.get_severity(), 170 | code=f"ollama:{edit.type}", 171 | ) 172 | action = self.build_single_suggestion_action( 173 | doc=doc, 174 | title=f'"{token}" -> "{edit.new_token}"', 175 | edit=TextEdit( 176 | range=diagnostic.range, 177 | new_text=edit.new_token, 178 | ), 179 | diagnostic=diagnostic, 180 | ) 181 | code_actions.append(action) 182 | diagnostics.append(diagnostic) 183 | 184 | return diagnostics, code_actions 185 | 186 | def _did_open(self, doc: BaseDocument): 187 | diagnostics = list() 188 | code_actions = list() 189 | checked = set() 190 | for paragraph in doc.paragraphs_at_offset( 191 | 0, len(doc.cleaned_source), cleaned=True 192 | ): 193 | diags, actions = self._handle_paragraph(doc, paragraph) 194 | diagnostics.extend(diags) 195 | code_actions.extend(actions) 196 | checked.add(paragraph) 197 | 198 | self.add_diagnostics(doc, diagnostics) 199 | self.add_code_actions(doc, code_actions) 200 | 201 | def _did_change(self, doc: BaseDocument, changes: List[Interval]): 202 | diagnostics = list() 203 | code_actions = list() 204 | checked = set() 205 | for change in changes: 206 | for paragraph in doc.paragraphs_at_offset( 207 | change.start, 208 | min_offset=change.start + change.length - 1, 209 | cleaned=True, 210 | ): 211 | if paragraph in checked: 212 | continue 213 | 214 | diags, actions = self._handle_paragraph(doc, paragraph) 215 | diagnostics.extend(diags) 216 | code_actions.extend(actions) 217 | checked.add(paragraph) 218 | 219 | self.add_diagnostics(doc, diagnostics) 220 | self.add_code_actions(doc, code_actions) 221 | 222 | def _handle_paragraph(self, doc: BaseDocument, paragraph: Interval): 223 | if ( 224 | len(doc.text_at_offset(paragraph.start, paragraph.length, True).strip()) 225 | == 0 226 | ): 227 | return [], [] 228 | 229 | pos_range = doc.range_at_offset(paragraph.start, paragraph.length, True) 230 | self.remove_code_items_at_range(doc, pos_range) 231 | 232 | diags, actions = self._analyse( 233 | doc.text_at_offset(paragraph.start, paragraph.length, True), 234 | doc, 235 | paragraph.start, 236 | ) 237 | 238 | diagnostics = [diag for diag in diags if diag.range.start >= pos_range.start] 239 | code_actions = [ 240 | action 241 | for action in actions 242 | if action.edit.document_changes[0].edits[0].range.start >= pos_range.start 243 | ] 244 | 245 | return diagnostics, code_actions 246 | 247 | def command_generate(self, uri: str, prompt: str, position: str, new_line=True): 248 | with ProgressBar( 249 | self.language_server, 250 | f"{self.name} generating", 251 | token=self._progressbar_token, 252 | ): 253 | doc = self.get_document(uri) 254 | 255 | result = self._generate(prompt) 256 | if result is None: 257 | return [], [] 258 | 259 | new_text = f"{result['response'].strip()}\n" 260 | position = Position(**eval(position)) 261 | range = Range( 262 | start=position, 263 | end=position, 264 | ) 265 | 266 | edit = WorkspaceEdit( 267 | document_changes=[ 268 | TextDocumentEdit( 269 | text_document=VersionedTextDocumentIdentifier( 270 | uri=doc.uri, 271 | version=doc.version, 272 | ), 273 | edits=[ 274 | TextEdit( 275 | range=range, 276 | new_text=new_text, 277 | ), 278 | ], 279 | ) 280 | ] 281 | ) 282 | self.language_server.apply_edit(edit, "textlsp.ollama.generate") 283 | 284 | def get_code_actions(self, params: CodeActionParams) -> Optional[List[CodeAction]]: 285 | doc = self.get_document(params) 286 | res = super().get_code_actions(params) 287 | 288 | if len(doc.lines) > 0: 289 | line = doc.lines[params.range.start.line].strip() 290 | else: 291 | line = "" 292 | magic = self.config.get( 293 | self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC 294 | ) 295 | if magic in line: 296 | if res is None: 297 | res = list() 298 | 299 | paragraph = doc.paragraph_at_position(params.range.start, False) 300 | position = doc.position_at_offset(paragraph.start + paragraph.length, False) 301 | position = str({"line": position.line, "character": position.character}) 302 | prompt = doc.text_at_offset(paragraph.start, paragraph.length, False) 303 | prompt = prompt[prompt.find(magic) + len(magic) :] 304 | title = "Prompt Ollama" 305 | res.append( 306 | self.build_command_action( 307 | doc=doc, 308 | title=title, 309 | command=Command( 310 | title=title, 311 | command=self.language_server.COMMAND_CUSTOM, 312 | arguments=[ 313 | { 314 | "command": "generate", 315 | "analyser": self.name, 316 | "uri": doc.uri, 317 | "prompt": prompt, 318 | "position": position, 319 | "new_line": True, 320 | } 321 | ], 322 | ), 323 | ) 324 | ) 325 | 326 | return res 327 | 328 | def get_completions( 329 | self, params: Optional[CompletionParams] = None 330 | ) -> Optional[CompletionList]: 331 | if params.position == Position(line=0, character=0): 332 | return None 333 | 334 | doc = self.get_document(params) 335 | line = doc.lines[params.position.line] 336 | magic = self.config.get( 337 | self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC 338 | ) 339 | 340 | line_prefix = line[: params.position.character].strip() 341 | if len(line_prefix) == 0 or line_prefix in magic: 342 | return [ 343 | CompletionItem( 344 | label=magic, 345 | detail="Ollama magic command for text generation based on" 346 | " the prompt that follows.", 347 | ) 348 | ] 349 | -------------------------------------------------------------------------------- /textLSP/analysers/openai/__init__.py: -------------------------------------------------------------------------------- 1 | from .openai import OpenAIAnalyser 2 | -------------------------------------------------------------------------------- /textLSP/analysers/openai/openai.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from openai import OpenAI, APIError 3 | 4 | from typing import List, Tuple, Optional 5 | from lsprotocol.types import ( 6 | Diagnostic, 7 | Range, 8 | Position, 9 | TextEdit, 10 | CodeAction, 11 | WorkspaceEdit, 12 | Command, 13 | CodeActionParams, 14 | TextDocumentEdit, 15 | VersionedTextDocumentIdentifier, 16 | CompletionParams, 17 | CompletionList, 18 | CompletionItem, 19 | MessageType, 20 | ) 21 | from pygls.server import LanguageServer 22 | 23 | from ..analyser import Analyser 24 | from ...types import Interval, ConfigurationError, TokenDiff, ProgressBar 25 | from ...documents.document import BaseDocument 26 | 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | class OpenAIAnalyser(Analyser): 32 | CONFIGURATION_API_KEY = "api_key" 33 | CONFIGURATION_URL = "url" 34 | CONFIGURATION_MODEL = "model" 35 | CONFIGURATION_EDIT_INSTRUCTION = "edit_instruction" 36 | CONFIGURATION_TEMPERATURE = "temperature" 37 | CONFIGURATION_MAX_TOKEN = "max_token" 38 | CONFIGURATION_PROMPT_MAGIC = "prompt_magic" 39 | 40 | SETTINGS_DEFAULT_URL = None 41 | SETTINGS_DEFAULT_MODEL = "text-babbage-001" 42 | SETTINGS_DEFAULT_EDIT_INSTRUCTION = "Fix spelling and grammar errors." 43 | SETTINGS_DEFAULT_TEMPERATURE = 0 44 | SETTINGS_DEFAULT_MAX_TOKEN = 16 45 | SETTINGS_DEFAULT_PROMPT_MAGIC = "%OPENAI% " 46 | SETTINGS_DEFAULT_CHECK_ON = { 47 | Analyser.CONFIGURATION_CHECK_ON_OPEN: False, 48 | Analyser.CONFIGURATION_CHECK_ON_CHANGE: False, 49 | Analyser.CONFIGURATION_CHECK_ON_SAVE: False, 50 | } 51 | 52 | def __init__(self, language_server: LanguageServer, config: dict, name: str): 53 | super().__init__(language_server, config, name) 54 | if self.CONFIGURATION_API_KEY not in self.config: 55 | raise ConfigurationError( 56 | f"Required parameter: {name}.{self.CONFIGURATION_API_KEY}" 57 | ) 58 | url = self.config.get(self.CONFIGURATION_URL, self.SETTINGS_DEFAULT_URL) 59 | if url is not None and url.lower() == "none": 60 | url = None 61 | self._client = OpenAI( 62 | api_key=self.config[self.CONFIGURATION_API_KEY], 63 | base_url=url, 64 | ) 65 | 66 | def _chat_endpoint( 67 | self, 68 | system_msg: str, 69 | user_msg: str, 70 | model: str, 71 | temperature: int, 72 | max_tokens: int = None, 73 | ): 74 | assert system_msg is not None or user_msg is not None 75 | 76 | messages = list() 77 | if system_msg is not None: 78 | messages.append({"role": "system", "content": system_msg}), 79 | if user_msg is not None: 80 | messages.append({"role": "user", "content": user_msg}), 81 | 82 | res = self._client.chat.completions.create( 83 | model=model, 84 | messages=messages, 85 | temperature=temperature, 86 | max_tokens=max_tokens, 87 | ) 88 | 89 | return res 90 | 91 | def _edit(self, text) -> List[TokenDiff]: 92 | res = self._chat_endpoint( 93 | system_msg=self.config.get( 94 | self.CONFIGURATION_EDIT_INSTRUCTION, 95 | self.SETTINGS_DEFAULT_EDIT_INSTRUCTION, 96 | ), 97 | user_msg=text, 98 | model=self.config.get( 99 | self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL 100 | ), 101 | temperature=self.config.get( 102 | self.CONFIGURATION_TEMPERATURE, self.SETTINGS_DEFAULT_TEMPERATURE 103 | ), 104 | ) 105 | logger.debug(f"Response: {res}") 106 | 107 | if len(res.choices) > 0: 108 | # the API escapes special characters such as newlines 109 | res_text = ( 110 | res.choices[0].message.content.strip().encode().decode("unicode_escape") 111 | ) 112 | return TokenDiff.token_level_diff(text, res_text) 113 | 114 | return [] 115 | 116 | def _generate(self, text) -> Optional[str]: 117 | res = self._chat_endpoint( 118 | system_msg=text, 119 | user_msg=None, 120 | model=self.config.get( 121 | self.CONFIGURATION_MODEL, self.SETTINGS_DEFAULT_MODEL 122 | ), 123 | temperature=self.config.get( 124 | self.CONFIGURATION_TEMPERATURE, self.SETTINGS_DEFAULT_TEMPERATURE 125 | ), 126 | max_tokens=self.config.get( 127 | self.CONFIGURATION_MAX_TOKEN, self.SETTINGS_DEFAULT_MAX_TOKEN 128 | ), 129 | ) 130 | logger.debug(f"Response: {res}") 131 | 132 | if len(res.choices) > 0: 133 | # the API escapes special characters such as newlines 134 | return ( 135 | res.choices[0].message.content.strip().encode().decode("unicode_escape") 136 | ) 137 | 138 | return None 139 | 140 | def _analyse( 141 | self, text, doc, offset=0 142 | ) -> Tuple[List[Diagnostic], List[CodeAction]]: 143 | diagnostics = list() 144 | code_actions = list() 145 | 146 | # we don not want trailing whitespace 147 | text = text.rstrip() 148 | 149 | try: 150 | edits = self._edit(text) 151 | except APIError as e: 152 | self.language_server.show_message( 153 | str(e), 154 | MessageType.Error, 155 | ) 156 | edits = [] 157 | 158 | for edit in edits: 159 | if edit.type == TokenDiff.INSERT: 160 | if edit.offset >= len(text): 161 | edit.new_token = f" {edit.new_token}" 162 | else: 163 | edit.new_token = f" {edit.new_token} " 164 | edit.old_token = " " 165 | edit.offset -= 1 166 | edit.length += 1 167 | 168 | token = edit.old_token 169 | 170 | range = doc.range_at_offset(edit.offset + offset, edit.length, True) 171 | range = Range( 172 | start=range.start, 173 | end=Position( 174 | line=range.end.line, 175 | character=range.end.character + 1, 176 | ), 177 | ) 178 | 179 | if edit.type == TokenDiff.INSERT: 180 | message = f'insert "{edit.new_token}"' 181 | elif edit.type == TokenDiff.REPLACE: 182 | message = f'"{token}": use "{edit.new_token}" instead' 183 | else: 184 | message = f'"{token}": remove' 185 | 186 | diagnostic = Diagnostic( 187 | range=range, 188 | message=message, 189 | source="openai", 190 | severity=self.get_severity(), 191 | code=f"openai:{edit.type}", 192 | ) 193 | action = self.build_single_suggestion_action( 194 | doc=doc, 195 | title=f'"{token}" -> "{edit.new_token}"', 196 | edit=TextEdit( 197 | range=diagnostic.range, 198 | new_text=edit.new_token, 199 | ), 200 | diagnostic=diagnostic, 201 | ) 202 | code_actions.append(action) 203 | diagnostics.append(diagnostic) 204 | 205 | return diagnostics, code_actions 206 | 207 | def _did_open(self, doc: BaseDocument): 208 | diagnostics = list() 209 | code_actions = list() 210 | checked = set() 211 | for paragraph in doc.paragraphs_at_offset( 212 | 0, len(doc.cleaned_source), cleaned=True 213 | ): 214 | diags, actions = self._handle_paragraph(doc, paragraph) 215 | diagnostics.extend(diags) 216 | code_actions.extend(actions) 217 | checked.add(paragraph) 218 | 219 | self.add_diagnostics(doc, diagnostics) 220 | self.add_code_actions(doc, code_actions) 221 | 222 | def _did_change(self, doc: BaseDocument, changes: List[Interval]): 223 | diagnostics = list() 224 | code_actions = list() 225 | checked = set() 226 | for change in changes: 227 | paragraph = doc.paragraph_at_offset( 228 | change.start, 229 | min_offset=change.start + change.length - 1, 230 | cleaned=True, 231 | ) 232 | if paragraph in checked: 233 | continue 234 | 235 | diags, actions = self._handle_paragraph(doc, paragraph) 236 | diagnostics.extend(diags) 237 | code_actions.extend(actions) 238 | checked.add(paragraph) 239 | 240 | self.add_diagnostics(doc, diagnostics) 241 | self.add_code_actions(doc, code_actions) 242 | 243 | def _handle_paragraph(self, doc: BaseDocument, paragraph: Interval): 244 | if ( 245 | len(doc.text_at_offset(paragraph.start, paragraph.length, True).strip()) 246 | == 0 247 | ): 248 | return [], [] 249 | 250 | pos_range = doc.range_at_offset(paragraph.start, paragraph.length, True) 251 | self.remove_code_items_at_range(doc, pos_range) 252 | 253 | diags, actions = self._analyse( 254 | doc.text_at_offset(paragraph.start, paragraph.length, True), 255 | doc, 256 | paragraph.start, 257 | ) 258 | 259 | diagnostics = [diag for diag in diags if diag.range.start >= pos_range.start] 260 | code_actions = [ 261 | action 262 | for action in actions 263 | if action.edit.document_changes[0].edits[0].range.start >= pos_range.start 264 | ] 265 | 266 | return diagnostics, code_actions 267 | 268 | def command_generate(self, uri: str, prompt: str, position: str, new_line=True): 269 | with ProgressBar( 270 | self.language_server, 271 | f"{self.name} generating", 272 | token=self._progressbar_token, 273 | ): 274 | doc = self.get_document(uri) 275 | 276 | try: 277 | new_text = self._generate(prompt) 278 | except APIError as e: 279 | self.language_server.show_message( 280 | str(e), 281 | MessageType.Error, 282 | ) 283 | return 284 | 285 | new_text += "\n" 286 | position = Position(**eval(position)) 287 | range = Range( 288 | start=position, 289 | end=position, 290 | ) 291 | 292 | edit = WorkspaceEdit( 293 | document_changes=[ 294 | TextDocumentEdit( 295 | text_document=VersionedTextDocumentIdentifier( 296 | uri=doc.uri, 297 | version=doc.version, 298 | ), 299 | edits=[ 300 | TextEdit( 301 | range=range, 302 | new_text=new_text, 303 | ), 304 | ], 305 | ) 306 | ] 307 | ) 308 | self.language_server.apply_edit(edit, "textlsp.openai.generate") 309 | 310 | def get_code_actions(self, params: CodeActionParams) -> Optional[List[CodeAction]]: 311 | doc = self.get_document(params) 312 | res = super().get_code_actions(params) 313 | 314 | if len(doc.lines) > 0: 315 | line = doc.lines[params.range.start.line].strip() 316 | else: 317 | line = "" 318 | magic = self.config.get( 319 | self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC 320 | ) 321 | if magic in line: 322 | if res is None: 323 | res = list() 324 | 325 | paragraph = doc.paragraph_at_position(params.range.start, False) 326 | position = doc.position_at_offset(paragraph.start + paragraph.length, False) 327 | position = str({"line": position.line, "character": position.character}) 328 | prompt = doc.text_at_offset(paragraph.start, paragraph.length, False) 329 | prompt = prompt[prompt.find(magic) + len(magic) :] 330 | title = "Prompt OpenAI" 331 | res.append( 332 | self.build_command_action( 333 | doc=doc, 334 | title=title, 335 | command=Command( 336 | title=title, 337 | command=self.language_server.COMMAND_CUSTOM, 338 | arguments=[ 339 | { 340 | "command": "generate", 341 | "analyser": self.name, 342 | "uri": doc.uri, 343 | "prompt": prompt, 344 | "position": position, 345 | "new_line": True, 346 | } 347 | ], 348 | ), 349 | ) 350 | ) 351 | 352 | return res 353 | 354 | def get_completions( 355 | self, params: Optional[CompletionParams] = None 356 | ) -> Optional[CompletionList]: 357 | if params.position == Position(line=0, character=0): 358 | return None 359 | 360 | doc = self.get_document(params) 361 | line = doc.lines[params.position.line] 362 | magic = self.config.get( 363 | self.CONFIGURATION_PROMPT_MAGIC, self.SETTINGS_DEFAULT_PROMPT_MAGIC 364 | ) 365 | 366 | line_prefix = line[: params.position.character].strip() 367 | if len(line_prefix) == 0 or line_prefix in magic: 368 | return [ 369 | CompletionItem( 370 | label=magic, 371 | detail="OpenAI magic command for text generation based on" 372 | " the prompt that follows.", 373 | ) 374 | ] 375 | -------------------------------------------------------------------------------- /textLSP/cli.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import argparse 3 | 4 | from .server import SERVER 5 | 6 | 7 | def getArguments(): 8 | parser = argparse.ArgumentParser() 9 | 10 | parser.add_argument( 11 | '-a', 12 | '--address', 13 | type=str, 14 | help='Listen address.' 15 | ) 16 | parser.add_argument( 17 | '-p', 18 | '--port', 19 | type=int, 20 | help='Listen port.' 21 | ) 22 | parser.add_argument( 23 | '--log-level', 24 | type=str, 25 | default='WARNING', 26 | choices=list(logging._nameToLevel.keys()) 27 | ) 28 | 29 | return parser.parse_args() 30 | 31 | 32 | def main(): 33 | args = getArguments() 34 | 35 | address = args.address 36 | port = args.port 37 | log_level = args.log_level.upper() 38 | 39 | logging.basicConfig(level=logging._nameToLevel[log_level]) 40 | 41 | if address is not None and port is not None: 42 | SERVER.start_tcp(address, port) 43 | else: 44 | SERVER.start_io() 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /textLSP/documents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangyav/textLSP/d23c638521847a0b9a6b9b864df2ccec14f1856e/textLSP/documents/__init__.py -------------------------------------------------------------------------------- /textLSP/documents/latex/__init__.py: -------------------------------------------------------------------------------- 1 | from .latex import LatexDocument 2 | -------------------------------------------------------------------------------- /textLSP/documents/latex/latex.py: -------------------------------------------------------------------------------- 1 | from typing import Generator 2 | from tree_sitter import Tree 3 | 4 | from ..document import TreeSitterDocument, TextNode 5 | 6 | 7 | class LatexDocument(TreeSitterDocument): 8 | TEXT = 'text' 9 | WORD = 'word' 10 | SECTION = 'section' 11 | SUBSECTION = 'subsection' 12 | PARAGRAPH = 'paragraph' 13 | CURLY_GROUP = 'curly_group' 14 | ENUM_ITEM = 'enum_item' 15 | GENERIC_ENVIRONMENT = 'generic_environment' 16 | ERROR = 'ERROR' # content in syntex error, e.g. missing closing environment 17 | 18 | NODE_CONTENT = 'content' 19 | NODE_NEWLINE_BEFORE_AFTER = 'newline_before_after' 20 | 21 | TEXT_ROOTS = { 22 | SECTION, 23 | SUBSECTION, 24 | PARAGRAPH, 25 | CURLY_GROUP, 26 | ENUM_ITEM, 27 | GENERIC_ENVIRONMENT, 28 | ERROR, 29 | } 30 | 31 | NEWLINE_BEFORE_AFTER_CURLY_PARENT = { 32 | SECTION, 33 | SUBSECTION, 34 | PARAGRAPH, 35 | } 36 | 37 | NEWLINE_BEFORE_AFTER = { 38 | ENUM_ITEM, 39 | } 40 | 41 | def __init__(self, *args, **kwargs): 42 | super().__init__( 43 | 'latex', 44 | 'https://github.com/latex-lsp/tree-sitter-latex', 45 | 'v0.3.0', 46 | *args, 47 | **kwargs, 48 | ) 49 | 50 | def _build_query(self): 51 | query_str = '' 52 | 53 | for root in self.TEXT_ROOTS: 54 | query_str += f'({root} ({self.TEXT} ({self.WORD}) @{self.NODE_CONTENT}))\n' 55 | 56 | for root in self.NEWLINE_BEFORE_AFTER_CURLY_PARENT: 57 | query_str += f'({root} ({self.CURLY_GROUP}) @{self.NODE_NEWLINE_BEFORE_AFTER})\n' 58 | for root in self.NEWLINE_BEFORE_AFTER: 59 | query_str += f'({root}) @{self.NODE_NEWLINE_BEFORE_AFTER}\n' 60 | 61 | return self._ts_language.query(query_str) 62 | 63 | def _iterate_text_nodes( 64 | self, 65 | tree: Tree, 66 | start_point, 67 | end_point, 68 | ) -> Generator[TextNode, None, None]: 69 | lines = tree.text.decode('utf-8').split('\n') 70 | 71 | last_sent = None 72 | new_lines_after = list() 73 | 74 | for node in self._query.captures(tree.root_node, start_point=start_point, end_point=end_point): 75 | # Check if we need some newlines after previous elements 76 | while len(new_lines_after) > 0: 77 | if node[0].start_point > new_lines_after[0]: 78 | if last_sent is not None: 79 | for nl in TextNode.get_new_lines(2, last_sent.end_point): 80 | last_sent = nl 81 | yield nl 82 | new_lines_after.pop(0) 83 | else: 84 | break 85 | 86 | if node[1] == self.NODE_CONTENT: 87 | # check if we need newlines due to linebreaks in source 88 | if ( 89 | last_sent is not None 90 | and last_sent.text[-1] != '\n' 91 | and node[0].start_point[0] - last_sent.end_point[0] > 1 92 | and '' in lines[last_sent.end_point[0]+1:node[0].start_point[0]] 93 | ): 94 | for nl_node in TextNode.get_new_lines(2, last_sent.end_point): 95 | yield nl_node 96 | last_sent = nl_node 97 | 98 | # handle spaces 99 | if self._needs_space_before(node[0], lines, last_sent): 100 | sp = node[0].start_point 101 | if sp[1] > 0: 102 | yield TextNode.space( 103 | start_point=(sp[0], sp[1]-1), 104 | end_point=(sp[0], sp[1]-1), 105 | ) 106 | else: 107 | yield TextNode.space( 108 | start_point=( 109 | last_sent.end_point[0], 110 | last_sent.end_point[1]+1 111 | ), 112 | end_point=( 113 | last_sent.end_point[0], 114 | last_sent.end_point[1]+1 115 | ), 116 | ) 117 | 118 | ############################################################### 119 | # XXX This is a workaround to handle the issues related to 120 | # commas and hyphens in the TS grammar. See: 121 | # - https://github.com/latex-lsp/tree-sitter-latex/issues/73 122 | # - https://github.com/latex-lsp/tree-sitter-latex/issues/74 123 | # and remove this block when the issues are fixed. 124 | line = self.lines[node[0].end_point[0]] 125 | char = None 126 | if node[0].end_point[1] < len(line): 127 | char = line[node[0].end_point[1]] 128 | 129 | if char in {',', '-'}: 130 | last_sent = TextNode( 131 | text=node[0].text.decode('utf-8')+char, 132 | start_point=node[0].start_point, 133 | end_point=(node[0].end_point[0], node[0].end_point[1]) # node.end_point[1]-1+1 134 | ) 135 | else: 136 | ########################################################### 137 | last_sent = TextNode.from_ts_node(node[0]) 138 | yield last_sent 139 | elif node[1] == self.NODE_NEWLINE_BEFORE_AFTER: 140 | new_lines_after.append(node[0].end_point) 141 | if last_sent is not None: 142 | for nl_node in TextNode.get_new_lines(2, last_sent.end_point): 143 | yield nl_node 144 | last_sent = nl_node 145 | 146 | yield from TextNode.get_new_lines( 147 | 1, 148 | last_sent.end_point if last_sent else (0, 0) 149 | ) 150 | 151 | def _needs_space_before(self, node, lines, last_sent) -> bool: 152 | if last_sent is None or last_sent.text[-1] == '\n': 153 | return False 154 | if node.start_point[0] == last_sent.end_point[0]: 155 | return ' ' in lines[node.start_point[0]][last_sent.end_point[1]:node.start_point[1]] 156 | return last_sent.text != '\n' 157 | -------------------------------------------------------------------------------- /textLSP/documents/markdown/__init__.py: -------------------------------------------------------------------------------- 1 | from .markdown import MarkDownDocument 2 | -------------------------------------------------------------------------------- /textLSP/documents/markdown/markdown.py: -------------------------------------------------------------------------------- 1 | from typing import Generator 2 | from tree_sitter import Tree 3 | 4 | from ..document import TreeSitterDocument, TextNode 5 | 6 | 7 | class MarkDownDocument(TreeSitterDocument): 8 | SUBFOLDER_MARKDOWN = 'tree-sitter-markdown' 9 | SUBFOLDER_MARKDOWN_INLINE = 'tree-sitter-markdown-inline' 10 | 11 | TEXT = 'text' 12 | PARAGRAPH = 'paragraph' 13 | HEADING_CONTENT = 'heading_content' 14 | ATX_HEADING = 'atx_heading' 15 | LINK_TEXT = 'link_text' 16 | LINK_LABEL = 'link_label' 17 | LINK_TITLE = 'link_title' 18 | EMPHASIS = 'emphasis' 19 | STRONG_EMPHASIS = 'strong_emphasis' 20 | STRIKETHROUGH = 'strikethrough' 21 | IMAGE_DESCRIPTION = 'image_description' 22 | TABLE_CELL = 'table_cell' 23 | 24 | NODE_CONTENT = 'content' 25 | NODE_NEWLINE_AFTER_ONE = 'newline_after_one' 26 | NODE_NEWLINE_AFTER_TWO = 'newline_after_two' 27 | 28 | ROOTS_WITH_TEXT = { 29 | PARAGRAPH, 30 | HEADING_CONTENT, 31 | LINK_TEXT, 32 | LINK_LABEL, 33 | LINK_TITLE, 34 | EMPHASIS, 35 | STRONG_EMPHASIS, 36 | STRIKETHROUGH, 37 | IMAGE_DESCRIPTION, 38 | TABLE_CELL, 39 | } 40 | 41 | NEWLINE_AFTER_ONE = { 42 | } 43 | 44 | NEWLINE_AFTER_TWO = { 45 | PARAGRAPH, 46 | ATX_HEADING, 47 | TABLE_CELL, 48 | } 49 | 50 | def __init__(self, *args, **kwargs): 51 | super().__init__( 52 | 'markdown', 53 | 'https://github.com/ikatyang/tree-sitter-markdown', 54 | 'v0.7.1', 55 | *args, 56 | **kwargs, 57 | ) 58 | 59 | def _build_query(self): 60 | query_str = '' 61 | 62 | for root in self.ROOTS_WITH_TEXT: 63 | query_str += f'({root} ({self.TEXT}) @{self.NODE_CONTENT})\n' 64 | 65 | for root in self.NEWLINE_AFTER_ONE: 66 | query_str += f'({root}) @{self.NODE_NEWLINE_AFTER_ONE}\n' 67 | 68 | for root in self.NEWLINE_AFTER_TWO: 69 | query_str += f'({root}) @{self.NODE_NEWLINE_AFTER_TWO}\n' 70 | 71 | return self._ts_language.query(query_str) 72 | 73 | def _iterate_text_nodes( 74 | self, 75 | tree: Tree, 76 | start_point, 77 | end_point, 78 | ) -> Generator[TextNode, None, None]: 79 | lines = tree.text.decode('utf-8').split('\n') 80 | 81 | last_sent = None 82 | new_lines_after = list() 83 | 84 | if start_point == end_point: 85 | # FIXME This is a weird issue, it seems that in some cases nothing 86 | # is selected if the interval is empty, but not in all cases. See 87 | # markdown_text.py test_edits() where first two characters of 88 | # '# Header' is removed 89 | end_point = ( 90 | end_point[0], 91 | end_point[1] + 1 92 | ) 93 | 94 | for node in self._query.captures(tree.root_node, start_point=start_point, end_point=end_point): 95 | # Check if we need some newlines after previous elements 96 | while len(new_lines_after) > 0: 97 | if node[0].start_point > new_lines_after[0]: 98 | if last_sent is not None: 99 | for nl in TextNode.get_new_lines(1, last_sent.end_point): 100 | last_sent = nl 101 | yield nl 102 | new_lines_after.pop(0) 103 | else: 104 | break 105 | 106 | if node[1] == self.NODE_CONTENT: 107 | if len(node[0].text.decode('utf-8').strip()) == 0: 108 | continue 109 | # handle spaces 110 | if self._needs_space_before(node[0], lines, last_sent): 111 | sp = node[0].start_point 112 | if sp[1] > 0: 113 | yield TextNode.space( 114 | start_point=(sp[0], sp[1]-1), 115 | end_point=(sp[0], sp[1]-1), 116 | ) 117 | else: 118 | yield TextNode.space( 119 | start_point=( 120 | last_sent.end_point[0], 121 | last_sent.end_point[1]+1 122 | ), 123 | end_point=( 124 | last_sent.end_point[0], 125 | last_sent.end_point[1]+1 126 | ), 127 | ) 128 | 129 | for nl in self._handle_text_nodes(node[0]): 130 | last_sent = nl 131 | yield nl 132 | 133 | elif node[1] == self.NODE_NEWLINE_AFTER_ONE: 134 | self._insert_point_in_order(node[0].end_point, new_lines_after) 135 | elif node[1] == self.NODE_NEWLINE_AFTER_TWO: 136 | self._insert_point_in_order(node[0].end_point, new_lines_after, 2) 137 | 138 | yield from TextNode.get_new_lines( 139 | 1, 140 | last_sent.end_point if last_sent else (0, 0) 141 | ) 142 | 143 | def _handle_text_nodes(self, inline_node) -> Generator[TextNode, None, None]: 144 | line_offset = 0 145 | text = inline_node.text.decode('utf-8').strip() 146 | last_sent = None 147 | 148 | for token in text.split(): 149 | if last_sent is not None: 150 | yield TextNode( 151 | text=' ', 152 | start_point=( 153 | inline_node.start_point[0], 154 | inline_node.start_point[1]+line_offset 155 | ), 156 | end_point=( 157 | inline_node.start_point[0], 158 | inline_node.start_point[1]+line_offset 159 | ), 160 | ) 161 | line_offset += 1 162 | 163 | token_len = len(token) 164 | node = TextNode( 165 | text=token, 166 | start_point=( 167 | inline_node.start_point[0], 168 | inline_node.start_point[1]+line_offset 169 | ), 170 | end_point=( 171 | inline_node.start_point[0], 172 | inline_node.start_point[1]+line_offset+token_len-1 173 | ), 174 | ) 175 | yield node 176 | 177 | last_sent = node 178 | line_offset += token_len 179 | 180 | @staticmethod 181 | def _insert_point_in_order(point, lst, times=1): 182 | i = 0 183 | length = len(lst) 184 | while i < length and lst[i] < point: 185 | i += 1 186 | 187 | for _ in range(times): 188 | lst.insert(i, point) 189 | 190 | def _needs_space_before(self, node, lines, last_sent) -> bool: 191 | if last_sent is None or last_sent.text[-1] == '\n': 192 | return False 193 | if node.start_point[0] == last_sent.end_point[0]: 194 | text = node.text.decode('utf-8') 195 | # text nodes contain whitespaces which can lead to errors 196 | # E.g.: |~~This~~| is a text| 197 | diff = len(text) - len(text.lstrip()) 198 | return ' ' in lines[node.start_point[0]][last_sent.end_point[1]:node.start_point[1]+diff] 199 | return last_sent.text != '\n' 200 | -------------------------------------------------------------------------------- /textLSP/documents/org/__init__.py: -------------------------------------------------------------------------------- 1 | from .org import OrgDocument 2 | -------------------------------------------------------------------------------- /textLSP/documents/org/org.py: -------------------------------------------------------------------------------- 1 | from typing import Generator 2 | from tree_sitter import Tree, Node 3 | 4 | from ..document import TreeSitterDocument, TextNode 5 | 6 | 7 | class OrgDocument(TreeSitterDocument): 8 | CONFIGURATION_TODO_KEYWORDS = 'org_todo_keywords' 9 | 10 | DEFAULT_TODO_KEYWORDS = {'TODO', 'DONE'} 11 | 12 | EXPR = 'expr' 13 | HEADLINE = 'headline' 14 | PARAGRAPH = 'paragraph' 15 | SECTION = 'section' 16 | PARAGRAPH = 'paragraph' 17 | ITEM = 'item' 18 | 19 | NODE_CONTENT = 'content' 20 | NODE_NEWLINE_AFTER_ONE = 'newline_after_one' 21 | NODE_NEWLINE_AFTER_TWO = 'newline_after_two' 22 | 23 | TEXT_ROOTS = { 24 | PARAGRAPH, 25 | } 26 | 27 | TEXT_ROOTS_WITH_ITEM = { 28 | HEADLINE, 29 | } 30 | 31 | NEWLINE_AFTER_ONE = { 32 | # SECTION, 33 | } 34 | 35 | NEWLINE_AFTER_TWO = { 36 | HEADLINE, 37 | PARAGRAPH, 38 | } 39 | 40 | def __init__(self, *args, **kwargs): 41 | super().__init__( 42 | 'org', 43 | 'https://github.com/milisims/tree-sitter-org', 44 | 'v1.3.1', 45 | *args, 46 | **kwargs, 47 | ) 48 | keywords = self.config.setdefault( 49 | self.CONFIGURATION_TODO_KEYWORDS, 50 | self.DEFAULT_TODO_KEYWORDS, 51 | ) 52 | if type(keywords) != set: 53 | self.config[self.CONFIGURATION_TODO_KEYWORDS] = set(keywords) 54 | 55 | def _build_query(self): 56 | query_str = '' 57 | 58 | for root in self.TEXT_ROOTS: 59 | query_str += f'({root} ({self.EXPR}) @{self.NODE_CONTENT})\n' 60 | 61 | for root in self.TEXT_ROOTS_WITH_ITEM: 62 | query_str += f'({root} (item ({self.EXPR}) @{self.NODE_CONTENT}))\n' 63 | 64 | for root in self.NEWLINE_AFTER_ONE: 65 | query_str += f'({root}) @{self.NODE_NEWLINE_AFTER_ONE}\n' 66 | 67 | for root in self.NEWLINE_AFTER_TWO: 68 | query_str += f'({root}) @{self.NODE_NEWLINE_AFTER_TWO}\n' 69 | 70 | return self._ts_language.query(query_str) 71 | 72 | def _iterate_text_nodes( 73 | self, 74 | tree: Tree, 75 | start_point, 76 | end_point, 77 | ) -> Generator[TextNode, None, None]: 78 | lines = tree.text.decode('utf-8').split('\n') 79 | 80 | last_sent = None 81 | new_lines_after = list() 82 | 83 | for node in self._query.captures(tree.root_node, start_point=start_point, end_point=end_point): 84 | # Check if we need some newlines after previous elements 85 | while len(new_lines_after) > 0: 86 | if node[0].start_point > new_lines_after[0]: 87 | if last_sent is not None: 88 | for nl in TextNode.get_new_lines(1, last_sent.end_point): 89 | last_sent = nl 90 | yield nl 91 | new_lines_after.pop(0) 92 | else: 93 | break 94 | 95 | if node[1] == self.NODE_CONTENT: 96 | # check if we need newlines due to linebreaks in source 97 | # if ( 98 | # last_sent is not None 99 | # and node[0].start_point[0] - last_sent.end_point[0] > 1 100 | # and '' in lines[last_sent.end_point[0]+1:node[0].start_point[0]] 101 | # ): 102 | # for nl_node in TextNode.get_new_lines(1, last_sent.end_point): 103 | # yield nl_node 104 | # last_sent = nl_node 105 | 106 | # handle spaces 107 | if self._needs_space_before(node[0], lines, last_sent): 108 | sp = node[0].start_point 109 | if sp[1] > 0: 110 | yield TextNode.space( 111 | start_point=(sp[0], sp[1]-1), 112 | end_point=(sp[0], sp[1]-1), 113 | ) 114 | else: 115 | yield TextNode.space( 116 | start_point=( 117 | last_sent.end_point[0], 118 | last_sent.end_point[1]+1 119 | ), 120 | end_point=( 121 | last_sent.end_point[0], 122 | last_sent.end_point[1]+1 123 | ), 124 | ) 125 | 126 | if self._valid_content_node(node[0]): 127 | last_sent = TextNode.from_ts_node(node[0]) 128 | yield last_sent 129 | elif node[1] == self.NODE_NEWLINE_AFTER_ONE: 130 | self._insert_point_in_order(node[0].end_point, new_lines_after) 131 | elif node[1] == self.NODE_NEWLINE_AFTER_TWO: 132 | self._insert_point_in_order(node[0].end_point, new_lines_after, 2) 133 | 134 | yield from TextNode.get_new_lines( 135 | 1, 136 | last_sent.end_point if last_sent else (0, 0) 137 | ) 138 | 139 | def _valid_content_node(self, node: Node): 140 | return not ( 141 | node.parent is not None 142 | and node.parent.parent is not None 143 | and node.parent.parent.type == self.HEADLINE 144 | and node.text.decode('utf-8') in self.config[self.CONFIGURATION_TODO_KEYWORDS] 145 | and self.lines[node.start_point[0]][:node.start_point[1]] == '*' * max(1, node.start_point[1]-1) + ' ' 146 | ) 147 | 148 | @staticmethod 149 | def _insert_point_in_order(point, lst, times=1): 150 | i = 0 151 | length = len(lst) 152 | while i < length and lst[i] < point: 153 | i += 1 154 | 155 | for _ in range(times): 156 | lst.insert(i, point) 157 | 158 | def _needs_space_before(self, node, lines, last_sent) -> bool: 159 | if last_sent is None or last_sent.text[-1] == '\n': 160 | return False 161 | if node.start_point[0] == last_sent.end_point[0]: 162 | return ' ' in lines[node.start_point[0]][last_sent.end_point[1]:node.start_point[1]] 163 | return last_sent.text != '\n' 164 | -------------------------------------------------------------------------------- /textLSP/documents/txt/__init__.py: -------------------------------------------------------------------------------- 1 | from .txt import TxtDocument 2 | -------------------------------------------------------------------------------- /textLSP/documents/txt/txt.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from lsprotocol.types import Position, Range 4 | 5 | from ..document import CleanableDocument 6 | from ...types import Interval 7 | 8 | 9 | class TxtDocument(CleanableDocument): 10 | CONFIGURATION_PARSE = 'parse' 11 | 12 | DEFAULT_PARSE = True 13 | 14 | PATTERN_BREAK_INLINE = re.compile('([^\n])\n([^\n])') 15 | 16 | def _clean_source(self): 17 | parse = self.config.setdefault( 18 | self.CONFIGURATION_PARSE, 19 | self.DEFAULT_PARSE, 20 | ) 21 | if parse: 22 | self._cleaned_source = self.PATTERN_BREAK_INLINE.sub(r'\1 \2', self.source) 23 | else: 24 | self._cleaned_source = self.source 25 | 26 | def position_at_offset(self, offset: int, cleaned=False) -> Position: 27 | return super().position_at_offset(offset, False) 28 | 29 | def range_at_offset(self, offset: int, length: int, cleaned=False) -> Range: 30 | return super().range_at_offset(offset, length, False) 31 | 32 | def offset_at_position(self, position: Position, cleaned=False) -> int: 33 | return super().offset_at_position(position, False) 34 | 35 | def paragraphs_at_range(self, position_range: Range, cleaned=False) -> Interval: 36 | return super().paragraphs_at_range(position_range, False) 37 | 38 | def last_position(self, cleaned=False): 39 | return super().last_position(False) 40 | -------------------------------------------------------------------------------- /textLSP/nn_utils.py: -------------------------------------------------------------------------------- 1 | from textLSP.types import ConfigurationError 2 | 3 | try: 4 | import torch 5 | except ModuleNotFoundError: 6 | raise ConfigurationError( 7 | "Dependencies are missing for torch-based modules. Please look at textLSP's" 8 | " documentation for installing additional dependencies." 9 | ) 10 | 11 | def get_device(use_gpu: bool): 12 | if isinstance(use_gpu, str): 13 | return use_gpu 14 | 15 | if use_gpu: 16 | if torch.cuda.is_available(): 17 | return 'cuda' 18 | 19 | if torch.backends.mps.is_available(): 20 | return 'mps' 21 | 22 | return 'cpu' 23 | 24 | 25 | def set_quantization_args(bits, device, model_kwargs): 26 | if bits not in {4, 8, 16, 32}: 27 | raise ConfigurationError(f'Invalid quantization value: {bits}.' 28 | ' Supported: 4, 8, 16, 32.') 29 | 30 | if bits == 16: 31 | model_kwargs['torch_dtype'] = torch.bfloat16 32 | elif bits == 4 or bits == 8: 33 | if device != 'cuda': 34 | raise ConfigurationError(f'{bits}bit quantization needs CUDA GPU.') 35 | else: 36 | model_kwargs[f'load_in_{bits}bit'] = True 37 | 38 | return model_kwargs 39 | -------------------------------------------------------------------------------- /textLSP/server.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import List, Optional 4 | from pygls.server import LanguageServer 5 | from pygls.protocol import LanguageServerProtocol, lsp_method 6 | from pygls.workspace import Document 7 | from lsprotocol.types import ( 8 | TEXT_DOCUMENT_DID_OPEN, 9 | TEXT_DOCUMENT_DID_CHANGE, 10 | TEXT_DOCUMENT_DID_CLOSE, 11 | TEXT_DOCUMENT_DID_SAVE, 12 | TEXT_DOCUMENT_CODE_ACTION, 13 | WORKSPACE_DID_CHANGE_CONFIGURATION, 14 | INITIALIZE, 15 | TEXT_DOCUMENT_COMPLETION, 16 | SHUTDOWN, 17 | ) 18 | from lsprotocol.types import ( 19 | DidOpenTextDocumentParams, 20 | DidChangeTextDocumentParams, 21 | DidSaveTextDocumentParams, 22 | DidChangeConfigurationParams, 23 | DidCloseTextDocumentParams, 24 | InitializeParams, 25 | InitializeResult, 26 | CodeActionParams, 27 | CodeActionKind, 28 | CodeActionOptions, 29 | CodeAction, 30 | CompletionList, 31 | CompletionOptions, 32 | CompletionParams, 33 | ShutdownRequest, 34 | ) 35 | from .workspace import TextLSPWorkspace 36 | from .utils import merge_dicts, get_textlsp_version 37 | from .analysers.handler import AnalyserHandler 38 | 39 | 40 | logger = logging.getLogger(__name__) 41 | 42 | 43 | class TextLSPLanguageServerProtocol(LanguageServerProtocol): 44 | def __init__(self, *args, **kwargs): 45 | super().__init__(*args, **kwargs) 46 | 47 | @lsp_method(INITIALIZE) 48 | def lsp_initialize(self, params: InitializeParams) -> InitializeResult: 49 | result = super().lsp_initialize(params) 50 | self._workspace = TextLSPWorkspace.workspace2textlspworkspace( 51 | self.workspace, 52 | self._server.analyser_handler, 53 | self._server.settings, 54 | ) 55 | self._server.update_settings(params.initialization_options) 56 | return result 57 | 58 | 59 | class TextLSPLanguageServer(LanguageServer): 60 | # TODO make a config class for easier settings hangling and option for 61 | # settings keys such as textLSP.check_text.on_edit 62 | CONFIGURATION_SECTION = 'textLSP' 63 | CONFIGURATION_ANALYSERS = 'analysers' 64 | CONFIGURATION_DOCUMENTS = 'documents' 65 | 66 | COMMAND_ANALYSE = 'analyse' 67 | COMMAND_CUSTOM = 'custom_command' 68 | 69 | def __init__(self, *args, **kwargs): 70 | super().__init__(*args, **kwargs) 71 | self.settings = dict() 72 | self.init_settings() 73 | self.analyser_handler = AnalyserHandler(self) 74 | logger.warning('TextLSP initialized!') 75 | 76 | def init_settings(self): 77 | self.settings.setdefault(self.CONFIGURATION_SECTION, dict()) 78 | self.settings[self.CONFIGURATION_SECTION].setdefault(self.CONFIGURATION_ANALYSERS, None) 79 | self.settings[self.CONFIGURATION_SECTION].setdefault(self.CONFIGURATION_DOCUMENTS, None) 80 | 81 | def get_analyser_settings(self, settings=None): 82 | if settings is None: 83 | settings = self.settings 84 | 85 | if ( 86 | self.CONFIGURATION_SECTION in settings and 87 | self.CONFIGURATION_ANALYSERS in settings[self.CONFIGURATION_SECTION] 88 | ): 89 | return self.settings[self.CONFIGURATION_SECTION][self.CONFIGURATION_ANALYSERS] 90 | return None 91 | 92 | def get_document_settings(self, settings=None): 93 | if settings is None: 94 | settings = self.settings 95 | 96 | if ( 97 | self.CONFIGURATION_SECTION in settings and 98 | self.CONFIGURATION_DOCUMENTS in settings[self.CONFIGURATION_SECTION] 99 | ): 100 | return self.settings[self.CONFIGURATION_SECTION][self.CONFIGURATION_DOCUMENTS] 101 | return None 102 | 103 | def update_settings(self, settings): 104 | if settings is None or len(settings) == 0: 105 | return 106 | 107 | self.settings = merge_dicts(self.settings, settings) 108 | if self.get_analyser_settings(settings): 109 | # update only if there was any update related to it 110 | self.analyser_handler.update_settings( 111 | self.get_analyser_settings() 112 | ) 113 | if self.get_document_settings(settings): 114 | # update only if there was any update related to it 115 | self.lsp.workspace.update_settings( 116 | self.get_document_settings() 117 | ) 118 | 119 | def publish_stored_diagnostics(self, doc: Document): 120 | diagnostics = list() 121 | for lst in self.analyser_handler.get_diagnostics(doc): 122 | diagnostics.extend(lst) 123 | self.publish_diagnostics(doc.uri, diagnostics) 124 | 125 | def shutdown(self): 126 | logger.warning('TextLSP shutting down!') 127 | self.analyser_handler.shutdown() 128 | super().shutdown() 129 | 130 | 131 | SERVER = TextLSPLanguageServer( 132 | name='textLSP', 133 | version=get_textlsp_version(), 134 | protocol_cls=TextLSPLanguageServerProtocol, 135 | ) 136 | 137 | 138 | @SERVER.feature(TEXT_DOCUMENT_DID_OPEN) 139 | async def did_open(ls: TextLSPLanguageServer, params: DidOpenTextDocumentParams): 140 | await ls.analyser_handler.did_open(params) 141 | 142 | 143 | @SERVER.feature(TEXT_DOCUMENT_DID_CHANGE) 144 | async def did_change(ls: TextLSPLanguageServer, params: DidChangeTextDocumentParams): 145 | await ls.analyser_handler.did_change(params) 146 | 147 | 148 | @SERVER.feature(TEXT_DOCUMENT_DID_SAVE) 149 | async def did_save(ls: TextLSPLanguageServer, params: DidSaveTextDocumentParams): 150 | await ls.analyser_handler.did_save(params) 151 | 152 | 153 | @SERVER.feature(TEXT_DOCUMENT_DID_CLOSE) 154 | async def did_close(ls: TextLSPLanguageServer, params: DidCloseTextDocumentParams): 155 | await ls.analyser_handler.did_close(params) 156 | 157 | 158 | @SERVER.feature(SHUTDOWN) 159 | def shutdown(ls: TextLSPLanguageServer, params: ShutdownRequest): 160 | ls.shutdown() 161 | 162 | 163 | @SERVER.feature(WORKSPACE_DID_CHANGE_CONFIGURATION) 164 | def did_change_configuration(ls: TextLSPLanguageServer, params: DidChangeConfigurationParams): 165 | ls.update_settings(params.settings) 166 | 167 | 168 | @SERVER.feature( 169 | TEXT_DOCUMENT_CODE_ACTION, 170 | CodeActionOptions( 171 | code_action_kinds=[ 172 | CodeActionKind.QuickFix, 173 | ], 174 | ), 175 | ) 176 | def code_action( 177 | ls: TextLSPLanguageServer, 178 | params: CodeActionParams 179 | ) -> Optional[List[CodeAction]]: 180 | return ls.analyser_handler.get_code_actions(params) 181 | 182 | 183 | @SERVER.command(TextLSPLanguageServer.COMMAND_ANALYSE) 184 | async def command_analyse(ls: TextLSPLanguageServer, *args): 185 | await ls.analyser_handler.command_analyse(*args) 186 | 187 | 188 | @SERVER.command(TextLSPLanguageServer.COMMAND_CUSTOM) 189 | async def command_custom_command(ls: TextLSPLanguageServer, *args): 190 | await ls.analyser_handler.command_custom_command(*args) 191 | 192 | 193 | @SERVER.feature( 194 | TEXT_DOCUMENT_COMPLETION, 195 | CompletionOptions(trigger_characters=[' ']) 196 | ) 197 | def completions( 198 | ls: TextLSPLanguageServer, 199 | params: Optional[CompletionParams] = None 200 | ) -> CompletionList: 201 | return ls.analyser_handler.get_completions(params) 202 | -------------------------------------------------------------------------------- /textLSP/types.py: -------------------------------------------------------------------------------- 1 | import re 2 | import bisect 3 | import enum 4 | import difflib 5 | import uuid 6 | 7 | from typing import Optional, Any, List 8 | from dataclasses import dataclass 9 | from sortedcontainers import SortedDict 10 | 11 | from lsprotocol.types import ( 12 | Position, 13 | Range, 14 | CodeActionKind, 15 | WorkDoneProgressBegin, 16 | WorkDoneProgressReport, 17 | WorkDoneProgressEnd, 18 | ) 19 | 20 | from .utils import position_to_tuple 21 | 22 | 23 | TEXT_PASSAGE_PATTERN = re.compile('[.?!] |\\n') 24 | LINE_PATTERN = re.compile('\\n') 25 | 26 | 27 | class ConfigurationError(Exception): 28 | pass 29 | 30 | 31 | @dataclass 32 | class Interval(): 33 | start: int 34 | length: int 35 | 36 | def __eq__(self, o: object): 37 | if not isinstance(o, Interval): 38 | return NotImplemented 39 | return self.start == o.start and self.length == o.length 40 | 41 | def __hash__(self): 42 | return hash((self.start, self.length)) 43 | 44 | def __gt__(self, o: object): 45 | if not isinstance(o, Interval): 46 | return NotImplemented 47 | return self.start > o.start 48 | 49 | 50 | @dataclass 51 | class OffsetPositionInterval(): 52 | offset_interval: Interval 53 | position_range: Range 54 | value: Optional[Any] = None 55 | 56 | 57 | class OffsetPositionIntervalList(): 58 | 59 | def __init__(self): 60 | self._offset_start = list() 61 | self._offset_end = list() 62 | self._position_start_line = list() 63 | self._position_start_character = list() 64 | self._position_end_line = list() 65 | self._position_end_character = list() 66 | self._value = list() 67 | 68 | def add_interval_values( 69 | self, 70 | offset_start: int, 71 | offset_end: int, 72 | position_start_line: int, 73 | position_start_character: int, 74 | position_end_line: int, 75 | position_end_character: int, 76 | value: Any 77 | ): 78 | self._offset_start.append(offset_start) 79 | self._offset_end.append(offset_end) 80 | self._position_start_line.append(position_start_line) 81 | self._position_start_character.append(position_start_character) 82 | self._position_end_line.append(position_end_line) 83 | self._position_end_character.append(position_end_character) 84 | self._value.append(value) 85 | 86 | def add_interval(self, interval: OffsetPositionInterval): 87 | self.add_interval_values( 88 | interval.offset_interval.start, 89 | interval.offset_interval.start + interval.offset_interval.length - 1, 90 | interval.position_range.start.line, 91 | interval.position_range.start.character, 92 | interval.position_range.end.line, 93 | interval.position_range.end.character, 94 | interval.value, 95 | ) 96 | 97 | def get_interval(self, idx: int) -> OffsetPositionInterval: 98 | return OffsetPositionInterval( 99 | offset_interval=Interval( 100 | start=self._offset_start[idx], 101 | length=self._offset_end[idx]-self._offset_start[idx]+1, 102 | ), 103 | position_range=Range( 104 | start=Position( 105 | line=self._position_start_line[idx], 106 | character=self._position_start_character[idx], 107 | ), 108 | end=Position( 109 | line=self._position_end_line[idx], 110 | character=self._position_end_character[idx], 111 | ), 112 | ), 113 | value=self._value[idx] 114 | ) 115 | 116 | def __len__(self): 117 | return len(self._offset_start) 118 | 119 | @property 120 | def values(self): 121 | return self._value 122 | 123 | def sort(self): 124 | indices = [ 125 | item[0] 126 | for item in sorted( 127 | enumerate(self._offset_start), 128 | key=lambda x:x[1] 129 | ) 130 | ] 131 | self._offset_start = [ 132 | self._offset_start[idx] 133 | for idx in indices 134 | ] 135 | self._offset_end = [ 136 | self._offset_end[idx] 137 | for idx in indices 138 | ] 139 | self._position_start_line = [ 140 | self._position_start_line[idx] 141 | for idx in indices 142 | ] 143 | self._position_start_character = [ 144 | self._position_start_character[idx] 145 | for idx in indices 146 | ] 147 | self._position_end_line = [ 148 | self._position_end_line[idx] 149 | for idx in indices 150 | ] 151 | self._position_end_character = [ 152 | self._position_end_character[idx] 153 | for idx in indices 154 | ] 155 | 156 | def get_idx_at_offset(self, offset: int) -> int: 157 | min_lst = self._offset_start 158 | max_lst = self._offset_end 159 | 160 | idx = bisect.bisect_left(max_lst, offset) 161 | if idx < len(max_lst) and min_lst[idx] <= offset <= max_lst[idx]: 162 | return idx 163 | 164 | return None 165 | 166 | def get_interval_at_offset(self, offset: int) -> OffsetPositionInterval: 167 | idx = self.get_idx_at_offset(offset) 168 | if idx is None: 169 | return None 170 | return self.get_interval(idx) 171 | 172 | def get_idx_at_position(self, position: Position, strict=True) -> int: 173 | """ 174 | :param strict: If False, return the idx of the next (or last) interval if does not exist 175 | """ 176 | idx = bisect.bisect_left(self._position_end_line, position.line) 177 | length = len(self) 178 | 179 | if idx == length: 180 | return None if strict else length-1 181 | if position.line < self._position_start_line[idx]: 182 | return None if strict else idx 183 | if position.line > self._position_end_line[idx]: 184 | return None if strict else length-1 185 | 186 | lst = list() 187 | i = idx 188 | while self._position_end_line[i] == self._position_end_line[idx]: 189 | lst.append(self._position_end_character[i]) 190 | i += 1 191 | if i >= length: 192 | break 193 | 194 | idx2 = bisect.bisect_left(lst, position.character) 195 | idx += idx2 196 | 197 | if idx == length: 198 | return None if strict else length-1 199 | 200 | if self._position_start_character[idx] <= position.character <= self._position_end_character[idx]: 201 | return idx 202 | if ( 203 | position.line < self._position_start_line[idx] or 204 | position.character < self._position_start_character[idx] 205 | ): 206 | return None if strict else idx 207 | 208 | return None if strict else min(idx+1, length-1) 209 | 210 | def get_interval_at_position(self, position: Position, strict=True) -> OffsetPositionInterval: 211 | """ 212 | :param strict: If False, return the object of the next (or last) interval if does not exist 213 | """ 214 | idx = self.get_idx_at_position(position, strict) 215 | if idx is None: 216 | return None 217 | return self.get_interval(idx) 218 | 219 | 220 | class PositionDict(): 221 | 222 | def __init__(self): 223 | self._positions = SortedDict() 224 | 225 | def add(self, position: Position, item): 226 | position = position_to_tuple(position) 227 | self._positions[position] = item 228 | 229 | def get(self, position: Position): 230 | position = position_to_tuple(position) 231 | return self._positions[position] 232 | 233 | def pop(self, position: Position): 234 | position = position_to_tuple(position) 235 | return self._positions.popitem(position) 236 | 237 | def update(self, old_position: Position, new_position: Position = None, 238 | new_value=None): 239 | assert new_position is not None or new_value is not None, ' new_position' 240 | ' or new_value should be specified.' 241 | 242 | old_position = position_to_tuple(old_position) 243 | new_position = position_to_tuple(new_position) 244 | if new_position is None: 245 | self._positions[old_position] = new_value 246 | return 247 | 248 | if new_value is None: 249 | new_value = self._positions.popitem(old_position) 250 | else: 251 | del self._positions[old_position] 252 | 253 | self._positions[new_position] = new_value 254 | 255 | def remove(self, position: Position): 256 | position = position_to_tuple(position) 257 | del self._positions[position] 258 | 259 | def remove_from(self, position: Position, inclusive=True): 260 | position = position_to_tuple(position) 261 | num = 0 262 | for key in list(self._positions.irange( 263 | minimum=position, 264 | inclusive=(inclusive, False) 265 | )): 266 | del self._positions[key] 267 | num += 1 268 | 269 | return num 270 | 271 | def remove_between(self, range: Range, inclusive=(True, True)): 272 | minimum = position_to_tuple(range.start) 273 | maximum = position_to_tuple(range.end) 274 | num = 0 275 | for key in list(self._positions.irange( 276 | minimum=minimum, 277 | maximum=maximum, 278 | inclusive=inclusive, 279 | )): 280 | del self._positions[key] 281 | num += 1 282 | 283 | return num 284 | 285 | def irange(self, minimum: Position = None, maximum: Position = None, *args, 286 | **kwargs): 287 | if minimum is not None: 288 | minimum = position_to_tuple(minimum) 289 | if maximum is not None: 290 | maximum = position_to_tuple(maximum) 291 | 292 | return self._positions.irange(minimum, maximum, *args, **kwargs) 293 | 294 | def irange_values(self, *args, **kwargs): 295 | for key in self.irange(*args, **kwargs): 296 | yield self._positions[key] 297 | 298 | def __iter__(self): 299 | return iter(self._positions.values()) 300 | 301 | 302 | @enum.unique 303 | class TextLSPCodeActionKind(str, enum.Enum): 304 | AcceptSuggestion = CodeActionKind.QuickFix + '.accept_suggestion' 305 | Command = 'command' 306 | 307 | 308 | @dataclass 309 | class TokenDiff(): 310 | INSERT = 'insert' 311 | DELETE = 'delete' 312 | REPLACE = 'replace' 313 | 314 | type: str 315 | old_token: str 316 | new_token: str 317 | offset: int 318 | length: int 319 | 320 | @staticmethod 321 | def _split(text): 322 | return [item for item in re.split("(\s)", text) if item != ""] 323 | 324 | @staticmethod 325 | def token_level_diff(text1, text2) -> List: 326 | tokens1 = TokenDiff._split(text1) 327 | tokens2 = TokenDiff._split(text2) 328 | diff = difflib.SequenceMatcher(None, tokens1, tokens2) 329 | 330 | return [ 331 | TokenDiff( 332 | type=item[0], 333 | old_token=''.join(tokens1[item[1]:item[2]]), 334 | new_token=''.join(tokens2[item[3]:item[4]]), 335 | offset=0 if item[1] == 0 else len(''.join(tokens1[:item[1]])), 336 | length=len(''.join(tokens1[item[1]:item[2]])), 337 | ) 338 | for item in diff.get_opcodes() 339 | if item[0] != 'equal' 340 | ] 341 | 342 | def __str__(self): 343 | return ( 344 | f'{self.type}: {self.old_token} -> {self.new_token} ' 345 | f'({self.offset}, {self.length})' 346 | ) 347 | 348 | 349 | class ProgressBar(): 350 | def __init__(self, ls, title='', percentage=0, token=None): 351 | self.ls = ls 352 | self.title = title 353 | self.percentage = percentage 354 | self.token = token 355 | if self.token is None: 356 | self.token = self.create_token() 357 | 358 | def begin(self, title=None, percentage=None): 359 | if title is not None: 360 | self.title = title 361 | if percentage is not None: 362 | self.percentage = percentage 363 | 364 | if self.token not in self.ls.progress.tokens: 365 | self.ls.progress.create(self.token) 366 | self.ls.progress.begin( 367 | self.token, 368 | WorkDoneProgressBegin( 369 | title=self.title, 370 | percentage=self.percentage, 371 | ) 372 | ) 373 | 374 | def update(self, message, percentage=0): 375 | self.ls.progress.report( 376 | self.token, 377 | WorkDoneProgressReport( 378 | message=message, 379 | percentage=percentage 380 | ), 381 | ) 382 | 383 | def end(self, message): 384 | self.ls.progress.end( 385 | self.token, 386 | WorkDoneProgressEnd(message=message) 387 | ) 388 | 389 | def __enter__(self): 390 | self.begin() 391 | return self 392 | 393 | def __exit__(self, type, value, traceback): 394 | self.end('Done') 395 | 396 | @staticmethod 397 | def create_token(): 398 | return str(uuid.uuid4()) 399 | -------------------------------------------------------------------------------- /textLSP/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import importlib 3 | import inspect 4 | import re 5 | 6 | from importlib.metadata import version 7 | from functools import wraps 8 | from threading import RLock 9 | from git import Repo 10 | from appdirs import user_cache_dir 11 | from lsprotocol.types import Position 12 | 13 | 14 | def merge_dicts(dict1, dict2): 15 | for key in dict2: 16 | if key in dict1 and isinstance(dict1[key], dict) and isinstance(dict2[key], dict): 17 | merge_dicts(dict1[key], dict2[key]) 18 | else: 19 | dict1[key] = dict2[key] 20 | return dict1 21 | 22 | 23 | def get_class(name, cls_type, return_multi=False): 24 | try: 25 | module = importlib.import_module(name) 26 | except ModuleNotFoundError: 27 | raise ModuleNotFoundError( 28 | f'Unsupported module: {name}', 29 | ) 30 | 31 | cls_lst = list() 32 | for cls_name, obj in inspect.getmembers( 33 | sys.modules[module.__name__], 34 | inspect.isclass 35 | ): 36 | if obj != cls_type and issubclass(obj, cls_type): 37 | if not return_multi and len(cls_lst) > 0: 38 | raise ImportError( 39 | f'There are multiple implementations of {name}. This is an' 40 | ' implementation error. Please report this issue!' 41 | ) 42 | cls_lst.append(obj) 43 | 44 | if len(cls_lst) == 0: 45 | raise ImportError( 46 | f'There is no implementation of {name}. This is an implementation' 47 | ' error. Please report this issue!', 48 | ) 49 | 50 | return cls_lst if return_multi else cls_lst[0] 51 | 52 | 53 | def synchronized(wrapped): 54 | lock = RLock() 55 | 56 | @wraps(wrapped) 57 | def _wrapper(*args, **kwargs): 58 | with lock: 59 | return wrapped(*args, **kwargs) 60 | return _wrapper 61 | 62 | 63 | def git_clone(url, dir, branch=None): 64 | repo = Repo.clone_from(url=url, to_path=dir) 65 | if branch is not None: 66 | repo.git.checkout(branch) 67 | return repo 68 | 69 | 70 | def get_textlsp_name(): 71 | return 'textLSP' 72 | 73 | 74 | def get_textlsp_version(): 75 | return version(get_textlsp_name()) 76 | 77 | 78 | def get_user_cache(app_name=None): 79 | if app_name is None: 80 | app_name = get_textlsp_name() 81 | return user_cache_dir(app_name) 82 | 83 | 84 | def batch_text(text: str, pattern: re.Pattern, max_size: int, min_size: int = 0): 85 | sidx = 0 86 | eidx = max_size 87 | text_len = len(text) 88 | while eidx <= text_len: 89 | matches = list( 90 | pattern.finditer( 91 | text[sidx:eidx] 92 | ) 93 | ) 94 | if len(matches) > 0 and matches[-1].end() > min_size: 95 | eidx = sidx + matches[-1].end() 96 | 97 | yield text[sidx:eidx] 98 | sidx = eidx 99 | eidx = sidx + max_size 100 | 101 | if sidx <= text_len: 102 | yield text[sidx:text_len] 103 | 104 | 105 | def position_to_tuple(position: Position): 106 | return (position.line, position.character) 107 | 108 | 109 | def traverse_tree(tree): 110 | cursor = tree.walk() 111 | 112 | reached_root = False 113 | while reached_root: 114 | yield cursor.node 115 | 116 | if cursor.goto_first_child(): 117 | continue 118 | 119 | if cursor.goto_next_sibling(): 120 | continue 121 | 122 | retracing = True 123 | while retracing: 124 | if not cursor.goto_parent(): 125 | retracing = False 126 | reached_root = True 127 | 128 | if cursor.goto_next_sibling(): 129 | retracing = False 130 | -------------------------------------------------------------------------------- /textLSP/workspace.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import Optional, Dict 4 | 5 | from lsprotocol.types import ( 6 | TextDocumentContentChangeEvent, 7 | VersionedTextDocumentIdentifier, 8 | ) 9 | from pygls.workspace import Workspace, TextDocument 10 | 11 | from .documents.document import DocumentTypeFactory 12 | from .analysers.handler import AnalyserHandler 13 | from .utils import merge_dicts 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class TextLSPWorkspace(Workspace): 19 | def __init__(self, analyser_handler: AnalyserHandler, settings: Dict, *args, **kwargs): 20 | super().__init__(*args, **kwargs) 21 | self.analyser_handler = analyser_handler 22 | self.settings = settings 23 | 24 | def _create_text_document( 25 | self, 26 | doc_uri: str, 27 | source: Optional[str] = None, 28 | version: Optional[int] = None, 29 | language_id: Optional[str] = None, 30 | ) -> TextDocument: 31 | return DocumentTypeFactory.get_document( 32 | doc_uri=doc_uri, 33 | config=self.settings, 34 | source=source, 35 | version=version, 36 | language_id=language_id, 37 | sync_kind=self._sync_kind 38 | ) 39 | 40 | @staticmethod 41 | def workspace2textlspworkspace( 42 | workspace: Workspace, 43 | analyser_handler: AnalyserHandler, 44 | settings: Optional[Dict] = None, 45 | ): 46 | if settings is None: 47 | settings = dict() 48 | return TextLSPWorkspace( 49 | analyser_handler=analyser_handler, 50 | settings=settings, 51 | root_uri=workspace._root_uri, 52 | sync_kind=workspace._sync_kind, 53 | workspace_folders=[folder for folder in workspace._folders.values()], 54 | ) 55 | 56 | def update_settings(self, settings): 57 | if settings is None: 58 | return 59 | 60 | self.settings = merge_dicts(self.settings, settings) 61 | 62 | def update_text_document( 63 | self, 64 | text_doc: VersionedTextDocumentIdentifier, 65 | change: TextDocumentContentChangeEvent 66 | ): 67 | doc = self._text_documents[text_doc.uri] 68 | self.analyser_handler.update_document(doc, change) 69 | super().update_text_document(text_doc, change) 70 | --------------------------------------------------------------------------------