├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── clj ├── .gitignore ├── clj │ └── cljsitter.clj └── deps.edn ├── make.sh ├── native ├── .gitignore ├── CMakeLists.txt └── src │ ├── java_api.c │ └── tests.c ├── pom.xml ├── src ├── main │ ├── java │ │ └── jsitter │ │ │ └── interop │ │ │ ├── JSitter.java │ │ │ └── LibLoader.java │ └── kotlin │ │ └── jsitter │ │ ├── api │ │ ├── ReducingZipper.kt │ │ └── api.kt │ │ └── impl │ │ ├── Cleaner.kt │ │ ├── SubtreeAccess.kt │ │ ├── TSLanguage.kt │ │ ├── TSParser.kt │ │ ├── TSTextInput.kt │ │ ├── TSTree.kt │ │ └── TSZipper.kt └── test │ └── kotlin │ └── jsitter │ └── test │ └── tests.kt └── testData └── router_go /.gitignore: -------------------------------------------------------------------------------- 1 | *.cbp 2 | .projectile 3 | .idea/ 4 | .vscode/ 5 | .project 6 | .settings 7 | .classpath 8 | target/ 9 | 10 | CMakeCache.txt 11 | cmake_install.cmake 12 | .DS_Store 13 | 14 | jsitter.iml 15 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "native/tree-sitter"] 2 | path = native/tree-sitter 3 | url = git@github.com:JetBrains/tree-sitter.git 4 | [submodule "native/grammars/tree-sitter-go"] 5 | path = native/grammars/tree-sitter-go 6 | url = git@github.com:JetBrains/tree-sitter-go.git 7 | [submodule "native/grammars/tree-sitter-json"] 8 | path = native/grammars/tree-sitter-json 9 | url = git@github.com:tree-sitter/tree-sitter-json.git 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2010-2019 JetBrains s.r.o 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![JetBrains team project](https://jb.gg/badges/team.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub) 2 | 3 | # Rational 4 | 5 | *tree-sitter* is a nice piece of parser technology. 6 | 7 | * Automatic error recovery 8 | * Incremental reparse 9 | * Immutable persistent trees, with structural sharing 10 | 11 | This features makes it perfect for use in IDEs and editors. 12 | However it is not obvious how we could take advantage over it on JVM platform. 13 | 14 | Here we have options: 15 | 16 | * Port it to Java. It requires too much work and probably some loss of performance. 17 | * Wrap tree nodes with Java objects. Leads to significant memory pressure and redundancy. 18 | * Navigate throught trees in native memory using Zippers. 19 | 20 | API with Zippers is the solution taken. 21 | Zipper gives us a nice abstraction over the place-in-tree (with ability to ascend) without compromising immutability of the tree itself. 22 | Zippers are implemented by accessing internal TreeSitter data structures directly using sun.misc.Unsafe instrinsics and don't do any JNI calls and don't consume any additional off-heap memory. 23 | 24 | Here is some discussion regarding the TreeSitter API: 25 | https://github.com/tree-sitter/tree-sitter/pull/360#issuecomment-501686115 26 | 27 | # Build Instructions 28 | 29 | ``` 30 | $ git submodule update --init --recursive 31 | $ ./make.sh 32 | $ mvn install 33 | ``` 34 | -------------------------------------------------------------------------------- /clj/.gitignore: -------------------------------------------------------------------------------- 1 | *.cp 2 | *.libs 3 | .nrepl-port 4 | .cpcache/ -------------------------------------------------------------------------------- /clj/clj/cljsitter.clj: -------------------------------------------------------------------------------- 1 | (ns cljsitter 2 | (:import [jsitter.api Parser Language Tree Node Zipper NodeType Terminal])) 3 | 4 | (defn node-type [^Node node] 5 | (when node (.getType node))) 6 | 7 | (defn byte-size [^Node node] 8 | (.getByteSize node)) 9 | 10 | (defn root [^Tree tree] 11 | (when tree (.getRoot tree))) 12 | 13 | (defn zipper [^Node node] 14 | (when node (.zipper node))) 15 | 16 | (defn byte-offset [^Zipper z] 17 | (.getByteOffset z)) 18 | 19 | (defn byte-range [z] 20 | (let [o (byte-offset z)] 21 | [o (+ o (byte-size z))])) 22 | 23 | (defn node [^Zipper z] 24 | (when z (.getNode z))) 25 | 26 | (defn up [^Zipper z] 27 | (when z (.up z))) 28 | 29 | (defn down [^Zipper z] 30 | (when z (.down z))) 31 | 32 | (defn right [^Zipper z] 33 | (when z (.right z))) 34 | 35 | (defn left [^Zipper z] 36 | (when z (.left z))) 37 | 38 | (defn next [^Zipper z] 39 | (when z (.next z))) 40 | 41 | (defn skip [^Zipper z] 42 | (when z (.skip z))) 43 | 44 | (defn alias [^Zipper z] 45 | (when z (.getAlias z))) 46 | 47 | (defn node-type-name [^NodeType t] 48 | (.getName t)) 49 | 50 | (defn terminal? [t] 51 | (cond 52 | (instance? NodeType t) (instance? Terminal t) 53 | (instance? Tree t) (recur (.getRoot ^Tree t)) 54 | (instance? Node t) (recur (.getType ^Node t)))) 55 | 56 | (defn- s-expr-impl [^Zipper z] 57 | (let [node-type ^NodeType (or (.getAlias z) 58 | (.getType (.getNode z)))] 59 | (if (instance? Terminal node-type) 60 | (.getName node-type) 61 | (lazy-seq (let [child (.down z)] 62 | (apply list (symbol (.getName node-type)) 63 | (map s-expr-impl (take-while some? (iterate #(.right ^Zipper %) child))))))))) 64 | 65 | (defn s-expr [x] 66 | (cond 67 | (instance? Zipper x) (s-expr-impl x) 68 | (instance? Tree x) (recur (.getRoot ^Tree x)) 69 | (instance? Node x) (recur (.zipper ^Node x)) 70 | :else nil)) 71 | 72 | (defn parse-str [lang-or-parser s] 73 | (if (instance? Language lang-or-parser) 74 | (recur (.parser ^Language lang-or-parser) s) 75 | (.parse ^Parser lang-or-parser (jsitter.api.StringText. s) nil nil))) 76 | 77 | -------------------------------------------------------------------------------- /clj/deps.edn: -------------------------------------------------------------------------------- 1 | {:deps {com.jetbrains.jsitter/jsitter {:mvn/version "0.1-SNAPSHOT"} 2 | com.clojure-goes-fast/clj-async-profiler {:mvn/version "0.3.1"} 3 | nrepl/nrepl {:mvn/version "0.5.3"}} 4 | :aliases {:profile {:jvm-opts ["-Djdk.attach.allowAttachSelf" 5 | "-XX:+UnlockDiagnosticVMOptions" 6 | "-XX:+DebugNonSafepoints" 7 | "-XX:NativeMemoryTracking=summary"]} 8 | :debug {:jvm-opts ["-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=1044"]}} 9 | :paths ["clj"]} 10 | -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd "$( dirname "${BASH_SOURCE[0]}" )/native" 4 | 5 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 6 | mkdir -p build/linux-x86-64 7 | BUILD_DIR=build/linux-x86-64 8 | elif [[ "$OSTYPE" == "darwin"* ]]; then 9 | mkdir -p build/darwin 10 | BUILD_DIR=build/darwin 11 | fi 12 | 13 | cmake -B $BUILD_DIR 14 | 15 | cd $BUILD_DIR 16 | make -------------------------------------------------------------------------------- /native/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | xcode/ 3 | -------------------------------------------------------------------------------- /native/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8 FATAL_ERROR) 2 | project(JSitter) 3 | 4 | set(CMAKE_BUILD_TYPE Release) 5 | 6 | find_package(JNI REQUIRED) 7 | 8 | SET(CMAKE_C_COMPILER "/usr/bin/clang") 9 | SET(CMAKE_C_FLAGS "-Wall -Ofast -g") 10 | 11 | include_directories(src/) 12 | include_directories(tree-sitter/lib/src tree-sitter/lib/include tree-sitter/lib/utf8proc) 13 | include_directories(${JNI_INCLUDE_DIRS}) 14 | 15 | add_library(jsitter SHARED src/java_api.c tree-sitter/lib/src/lib.c) 16 | 17 | add_library(tsgo SHARED grammars/tree-sitter-go/src/parser.c) 18 | 19 | add_executable(jsitter-tests src/tests.c) 20 | target_link_libraries(jsitter-tests jsitter tsgo) 21 | -------------------------------------------------------------------------------- /native/src/java_api.c: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | #include 4 | #include 5 | #include "subtree.h" 6 | 7 | /* Header for class jsitter_interop_JSitter */ 8 | /* 9 | * Class: jsitter_interop_JSitter 10 | * Method: getSymbolName 11 | * Signature: (JS)Ljava/lang/String; 12 | */ 13 | JNIEXPORT jstring JNICALL Java_jsitter_interop_JSitter_getSymbolName 14 | (JNIEnv *env, jclass cl, jlong language_ptr, jint ts_symbol) { 15 | const char *name = ts_language_symbol_name((TSLanguage *)language_ptr, (TSSymbol)ts_symbol); 16 | return (*env)->NewStringUTF(env, name); 17 | } 18 | 19 | /* 20 | * Class: jsitter_interop_JSitter 21 | * Method: isTerminal 22 | * Signature: (JS)Z 23 | */ 24 | JNIEXPORT jboolean JNICALL Java_jsitter_interop_JSitter_isTerminal 25 | (JNIEnv *env, jclass class, jlong language_ptr, jint ts_symbol) { 26 | return ts_language_symbol_type((TSLanguage*)language_ptr, (TSSymbol)ts_symbol) == TSSymbolTypeAnonymous; 27 | } 28 | 29 | JNIEXPORT jboolean JNICALL JavaCritical_jsitter_interop_JSitter_isTerminal 30 | (jlong language_ptr, jint ts_symbol) { 31 | return ts_language_symbol_type((TSLanguage*)language_ptr, (TSSymbol)ts_symbol) == TSSymbolTypeAnonymous; 32 | } 33 | 34 | /* 35 | * Class: jsitter_interop_JSitter 36 | * Method: getSymbolByName 37 | * Signature: (JLjava/lang/String;)S 38 | */ 39 | JNIEXPORT jint JNICALL Java_jsitter_interop_JSitter_getSymbolByName 40 | (JNIEnv *env, jclass class, jlong language_ptr, jstring name) { 41 | jboolean copy; 42 | const char *chars = (*env)->GetStringUTFChars(env, name, ©); 43 | TSSymbol symbol = ts_language_symbol_for_name((TSLanguage *)language_ptr, chars); 44 | if (copy) { 45 | (*env)->ReleaseStringUTFChars(env, name, chars); 46 | } 47 | return symbol; 48 | } 49 | 50 | JNIEXPORT void JNICALL Java_jsitter_interop_JSitter_releaseSubtree 51 | (JNIEnv *env , jclass class, jlong subtree_ptr) { 52 | SubtreePool pool = ts_subtree_pool_new(0); 53 | ts_subtree_release(&pool, *(Subtree *)(&subtree_ptr)); 54 | ts_subtree_pool_delete(&pool); 55 | } 56 | 57 | JNIEXPORT void JNICALL Java_jsitter_interop_JSitter_retainSubtree 58 | (JNIEnv *env , jclass class, jlong subtree_ptr) { 59 | ts_subtree_retain(*(Subtree *)(&subtree_ptr)); 60 | } 61 | 62 | /* 63 | * Class: jsitter_interop_JSitter 64 | * Method: releaseTree 65 | * Signature: (J)V 66 | */ 67 | JNIEXPORT void JNICALL Java_jsitter_interop_JSitter_releaseTree 68 | (JNIEnv *env , jclass class, jlong tree_ptr) { 69 | ts_tree_delete((TSTree *)tree_ptr); 70 | } 71 | 72 | /* 73 | * Class: jsitter_interop_JSitter 74 | * Method: releaseParser 75 | * Signature: (J)V 76 | */ 77 | JNIEXPORT void JNICALL Java_jsitter_interop_JSitter_releaseParser 78 | (JNIEnv *env , jclass class, jlong parser_ptr) { 79 | ts_parser_delete((TSParser *)parser_ptr); 80 | } 81 | 82 | struct Input { 83 | JNIEnv *env; 84 | jobject input; 85 | jmethodID read_mtd; 86 | const char *reading_addr; 87 | }; 88 | 89 | const char *input_jni_read(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read) { 90 | struct Input *input = (struct Input *)payload; 91 | jint read = (*input->env)->CallIntMethod(input->env, input->input, input->read_mtd, byte_index); 92 | *bytes_read = read; 93 | return input->reading_addr; 94 | } 95 | 96 | /* 97 | * Class: jsitter_interop_JSitter 98 | * Method: parse 99 | * Signature: (JJLjsitter/interop/JSitter/Input;JIII)J 100 | */ 101 | JNIEXPORT jlong JNICALL Java_jsitter_interop_JSitter_parse 102 | (JNIEnv *env, jclass class, jlong parser_ptr, jlong old_tree_ptr, jobject input, jint encoding, jobject reading_buffer) { 103 | static jmethodID read_mtd = 0; 104 | if (read_mtd == 0) { 105 | jclass input_class = (*env)->FindClass(env, "jsitter/interop/JSitter$Input"); 106 | read_mtd = (*env)->GetMethodID(env, input_class, "read", "(I)I"); 107 | } 108 | void *reading_addr = (*env)->GetDirectBufferAddress(env, reading_buffer); 109 | struct Input input_ctx; 110 | input_ctx.env = env; 111 | input_ctx.input = input; 112 | input_ctx.read_mtd = read_mtd; 113 | input_ctx.reading_addr = (const char *)reading_addr; 114 | 115 | TSInput ts_input; 116 | ts_input.encoding = (TSInputEncoding)encoding; 117 | ts_input.payload = &input_ctx; 118 | ts_input.read = &input_jni_read; 119 | 120 | TSTree *old_tree = (TSTree *)old_tree_ptr; 121 | TSParser *parser = (TSParser *)parser_ptr; 122 | 123 | TSTree *new_tree = ts_parser_parse(parser, old_tree, ts_input); 124 | return (jlong)new_tree; 125 | } 126 | 127 | JNIEXPORT void JNICALL Java_jsitter_interop_JSitter_editTree 128 | (JNIEnv *env, jclass class, jlong tree_ptr, jint start_byte, jint old_end_byte, jint new_end_byte) { 129 | TSInputEdit edit; 130 | edit.start_byte = start_byte; 131 | edit.old_end_byte = old_end_byte; 132 | edit.new_end_byte = new_end_byte; 133 | ts_tree_edit((TSTree *)tree_ptr, &edit); 134 | } 135 | 136 | JNIEXPORT jlong JNICALL Java_jsitter_interop_JSitter_editSubtree 137 | (JNIEnv *env, jclass class, jlong subtree_ptr, jint start_byte, jint old_end_byte, jint new_end_byte) { 138 | Subtree subtree = *(Subtree *)(&subtree_ptr); 139 | TSInputEdit edit; 140 | edit.start_byte = start_byte; 141 | edit.old_end_byte = old_end_byte; 142 | edit.new_end_byte = new_end_byte; 143 | 144 | SubtreePool pool = ts_subtree_pool_new(0); 145 | Subtree result = ts_subtree_edit(subtree, &edit, &pool); 146 | ts_subtree_pool_delete(&pool); 147 | return *((jlong *)&result); 148 | } 149 | 150 | 151 | JNIEXPORT jlong JNICALL Java_jsitter_interop_JSitter_copyTree 152 | (JNIEnv *env, jclass class, jlong tree_ptr) { 153 | return (jlong)ts_tree_copy((TSTree *)tree_ptr); 154 | } 155 | 156 | JNIEXPORT jintArray JNICALL Java_jsitter_interop_JSitter_getChangedRanges 157 | (JNIEnv *env, jclass class, jlong edited_tree_ptr, jlong new_tree_ptr) { 158 | uint32_t length; 159 | TSRange *ranges = ts_tree_get_changed_ranges((TSTree *)edited_tree_ptr, (TSTree *)new_tree_ptr, &length); 160 | if (length == 0) { 161 | return NULL; 162 | } else { 163 | uint32_t *res = (uint32_t*)malloc(2 * length); 164 | for (uint32_t i = 0; i < length; ++i) { 165 | res[2*i] = ranges[i].start_byte; 166 | res[2*i + 1] = ranges[i].end_byte; 167 | } 168 | 169 | jintArray result = (*env)->NewIntArray(env, 2 * length); 170 | (*env)->SetIntArrayRegion(env, result, 0, 2 * length, (const jint*)res); 171 | return result; 172 | } 173 | } 174 | 175 | JNIEXPORT void JNICALL Java_jsitter_interop_JSitter_parserReset 176 | (JNIEnv * env, jclass class, jlong parser_ptr) { 177 | ts_parser_reset((TSParser *)parser_ptr); 178 | } 179 | 180 | /* 181 | * Class: jsitter_interop_JSitter 182 | * Method: newParser 183 | * Signature: (J)J 184 | */ 185 | JNIEXPORT jlong JNICALL Java_jsitter_interop_JSitter_newParser 186 | (JNIEnv * env, jclass class, jlong language_ptr, jlong cancellationFlagPtr) { 187 | TSParser *parser = ts_parser_new(); 188 | ts_parser_set_cancellation_flag(parser, (size_t *)cancellationFlagPtr); 189 | ts_parser_set_language(parser, (TSLanguage *)language_ptr); 190 | return (jlong) parser; 191 | } 192 | 193 | JNIEXPORT jlong JNICALL JavaCritical_jsitter_interop_JSitter_newParser 194 | (jlong language_ptr) { 195 | TSParser *parser = ts_parser_new(); 196 | ts_parser_set_language(parser, (TSLanguage *)language_ptr); 197 | return (jlong) parser; 198 | } 199 | 200 | -------------------------------------------------------------------------------- /native/src/tests.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | //#include "java_api.cpp" 6 | #include 7 | 8 | TSLanguage *tree_sitter_json(); 9 | TSLanguage *tree_sitter_go(); 10 | 11 | bool ts_zipper_next(TSZipper *zipper, TSZipper *res, TSLanguage *lang) { 12 | bool down = ts_zipper_down(zipper, res, lang); 13 | if (down) { 14 | return true; 15 | } 16 | bool right = ts_zipper_right(zipper, res); 17 | if (right) { 18 | return true; 19 | } 20 | while ((zipper = ts_zipper_up(zipper))) { 21 | if (ts_zipper_right(zipper, res)) { 22 | return true; 23 | } 24 | } 25 | return false; 26 | } 27 | 28 | void perf() { 29 | FILE *f = fopen("/Users/jetzajac/Projects/jsitter/testData/router_go", "r"); 30 | fseek (f, 0, SEEK_END); 31 | size_t s = ftell(f); 32 | rewind(f); 33 | void *b = malloc(s); 34 | fread(b, s, 1, f); 35 | TSParser *parser = ts_parser_new(); 36 | ts_parser_set_language(parser, tree_sitter_go()); 37 | struct timespec start, end; 38 | clock_gettime(CLOCK_MONOTONIC_RAW, &start); 39 | TSTree *tree = ts_parser_parse_string(parser, 40 | NULL, 41 | (const char *)b, 42 | s); 43 | clock_gettime(CLOCK_MONOTONIC_RAW, &end); 44 | uint64_t delta_us = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000; 45 | printf("took %llu\n", delta_us); 46 | } 47 | 48 | void ranges() { 49 | TSParser *parser = ts_parser_new(); 50 | TSLanguage *lang = tree_sitter_go(); 51 | ts_parser_set_language(parser, lang); 52 | const char *str_go = "func hello() { sayHello() }"; 53 | TSTree *tree = ts_parser_parse_string( 54 | parser, 55 | NULL, 56 | str_go, 57 | strlen(str_go) 58 | ); 59 | const char *str_go2 = "func hello() { sayHello }"; 60 | TSInputEdit e; // + 8 61 | e.start_byte = 15 + 8; 62 | e.old_end_byte = 15 + 8 + 2; 63 | e.new_end_byte = 15 + 8; 64 | ts_tree_edit(tree, &e); 65 | TSTree *tree2 = ts_parser_parse_string(parser, tree, str_go2, strlen(str_go2)); 66 | uint32_t len; 67 | 68 | 69 | TSRange *ranges = ts_tree_get_changed_ranges(tree, tree2, &len); 70 | 71 | char *new_string = ts_node_string(ts_tree_root_node(tree2)); 72 | printf("New Syntax tree: %s\n", new_string); 73 | char *old_string = ts_node_string(ts_tree_root_node(tree)); 74 | printf("Old Syntax tree: %s\n", old_string); 75 | 76 | const char *str_go3 = "func hello() { sayHello() }"; 77 | TSInputEdit e2; // + 8 78 | e2.start_byte = 15 + 8; 79 | e2.new_end_byte = 15 + 8 + 2; 80 | e2.old_end_byte = 15 + 8; 81 | ts_tree_edit(tree2, &e2); 82 | TSTree *tree3 = ts_parser_parse_string(parser, tree2, str_go3, strlen(str_go3)); 83 | uint32_t len2; 84 | 85 | 86 | TSRange *ranges2 = ts_tree_get_changed_ranges(tree, tree2, &len2); 87 | 88 | char *new_string3 = ts_node_string(ts_tree_root_node(tree3)); 89 | printf("New Syntax tree: %s\n", new_string3); 90 | } 91 | 92 | void str_insert(size_t index, char c, char *str, size_t size) { 93 | memmove(str + index + 1, str + index, size - index); 94 | str[index] = c; 95 | } 96 | 97 | void test_crash() { 98 | FILE *f = fopen("/Users/jetzajac/Projects/jsitter/testData/router_go", "r"); 99 | fseek (f, 0, SEEK_END); 100 | size_t s = ftell(f); 101 | rewind(f); 102 | void *b = malloc(s + 1000); 103 | fread(b, s, 1, f); 104 | TSParser *parser = ts_parser_new(); 105 | ts_parser_set_language(parser, tree_sitter_go()); 106 | TSTree *tree = ts_parser_parse_string(parser, 107 | NULL, 108 | (const char *)b, 109 | s); 110 | TSTree *copy = ts_tree_copy(tree); 111 | //for (int i = 0; i < 100; ++i) { 112 | int i = 2; { 113 | printf("i = %d\n", i); 114 | //str_insert(1000 + i, 'x', b, s + i); 115 | TSTree *copy = ts_tree_copy(tree); 116 | TSInputEdit e; // + 8 117 | e.start_byte = 2000; 118 | e.old_end_byte = 2000; 119 | e.new_end_byte = 2000 + i; 120 | ts_tree_edit(copy, &e); 121 | TSTree *new_tree = ts_parser_parse_string(parser, copy, b, s); 122 | ts_tree_delete(copy); 123 | ts_tree_delete(new_tree); 124 | ts_parser_reset(parser); 125 | } 126 | } 127 | 128 | 129 | int main () { 130 | // ranges(); 131 | test_crash(); 132 | //perf(); 133 | return 0; 134 | } 135 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | com.jetbrains.jsitter 8 | jsitter 9 | 0.1-SNAPSHOT 10 | 11 | jsitter 12 | 13 | http://www.example.com 14 | 15 | 16 | UTF-8 17 | 1.8 18 | 1.8 19 | 1.3.60 20 | 21 | 22 | 23 | 24 | junit 25 | junit 26 | 4.13.1 27 | test 28 | 29 | 30 | org.jetbrains.kotlin 31 | kotlin-stdlib 32 | ${kotlin.version} 33 | 34 | 35 | io.lacuna 36 | bifurcan 37 | 0.1.0 38 | 39 | 40 | net.java.dev.jna 41 | jna 42 | 5.4.0 43 | 44 | 45 | 46 | 47 | ${project.basedir}/src/main/java 48 | ${project.basedir}/src/test/kotlin 49 | 50 | 51 | org.jetbrains.kotlin 52 | kotlin-maven-plugin 53 | ${kotlin.version} 54 | 55 | 1.8 56 | 57 | 58 | 59 | compile 60 | 61 | compile 62 | 63 | 64 | 65 | ${project.basedir}/src/main/kotlin 66 | ${project.basedir}/src/main/java 67 | 68 | 69 | 70 | 71 | 72 | test-compile 73 | 74 | test-compile 75 | 76 | 77 | 78 | ${project.basedir}/src/test/kotlin 79 | 80 | 81 | 82 | 83 | 84 | 85 | org.apache.maven.plugins 86 | maven-compiler-plugin 87 | 3.5.1 88 | 89 | 90 | 91 | default-compile 92 | none 93 | 94 | 95 | 96 | default-testCompile 97 | none 98 | 99 | 100 | java-compile 101 | compile 102 | 103 | compile 104 | 105 | 106 | 107 | java-test-compile 108 | test-compile 109 | 110 | testCompile 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | native/build/ 120 | 121 | 122 | 123 | darwin/libtsgo.dylib 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | native/build/ 133 | 134 | 135 | 136 | darwin/libjsitter.dylib 137 | 138 | 139 | linux-x86-64/libjsitter.so 140 | 141 | 142 | win32-x86-64/jsitter.dll 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /src/main/java/jsitter/interop/JSitter.java: -------------------------------------------------------------------------------- 1 | package jsitter.interop; 2 | 3 | import com.sun.jna.Function; 4 | import com.sun.jna.Native; 5 | import com.sun.jna.NativeLibrary; 6 | import org.jetbrains.annotations.NotNull; 7 | import org.jetbrains.annotations.Nullable; 8 | 9 | import java.io.File; 10 | import java.nio.ByteBuffer; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | 14 | public class JSitter { 15 | 16 | public static List retainedLibs = new ArrayList(); 17 | 18 | public static long loadLang(String fnName, String libnameOrPath, ClassLoader loader) { 19 | NativeLibrary instance = NativeLibrary.getInstance(libnameOrPath, loader); 20 | retainedLibs.add(instance); 21 | Function function = instance.getFunction(fnName); 22 | return function.invokeLong(new Object[]{}); 23 | } 24 | 25 | static { 26 | try { 27 | File jsitter = Native.extractFromResourcePath("jsitter", JSitter.class.getClassLoader()); 28 | System.load(jsitter.getAbsolutePath()); 29 | } catch (Throwable e) { 30 | throw new RuntimeException(e); 31 | } 32 | } 33 | 34 | public static native void releaseSubtree(long subtree); 35 | 36 | public static native void retainSubtree(long subtree); 37 | 38 | public interface Input { 39 | int read(int byteOffset); 40 | } 41 | 42 | public static native String getSymbolName(long languagePtr, int symbol); 43 | 44 | public static native boolean isTerminal(long languagePtr, int symbol); 45 | 46 | public static native int getSymbolByName(long languagePtr, String name); 47 | 48 | public static native void releaseTree(long treePtr); 49 | 50 | public static native void releaseParser(long parserPtr); 51 | 52 | public static native long parse(long parserPtr, 53 | long editedTreePtr, 54 | @NotNull Input input, 55 | int encoding, 56 | ByteBuffer readingBuffer); 57 | 58 | public static native long copyTree(long treePtr); 59 | 60 | public static native void editTree(long treePtr, int startByte, int oldEndByte, int newEndByte); 61 | 62 | public static native long editSubtree(long subtree, int startByte, int oldEndByte, int newEndByte); 63 | 64 | @Nullable 65 | public static native int[] getChangedRanges(long editedTreePtr, long newTreePtr); 66 | 67 | public static native long newParser(long languagePtr, long cancellationFlagPtr); 68 | 69 | public static native void parserReset(long parserPtr); 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/jsitter/interop/LibLoader.java: -------------------------------------------------------------------------------- 1 | package jsitter.interop; 2 | 3 | import java.io.*; 4 | import java.nio.file.FileSystemNotFoundException; 5 | import java.nio.file.FileSystems; 6 | import java.nio.file.Files; 7 | import java.nio.file.ProviderNotFoundException; 8 | import java.nio.file.StandardCopyOption; 9 | 10 | /** 11 | * A simple library class which helps with loading dynamic libraries stored in the 12 | * JAR archive. These libraries usually contain implementation of some methods in 13 | * native code (using JNI - Java Native Interface). 14 | * 15 | * @see http://adamheinrich.com/blog/2012/how-to-load-native-jni-library-from-jar 16 | * @see https://github.com/adamheinrich/native-utils 17 | * 18 | */ 19 | public class LibLoader { 20 | 21 | /** 22 | * The minimum length a prefix for a file has to have according to {@link File#createTempFile(String, String)}}. 23 | */ 24 | private static final int MIN_PREFIX_LENGTH = 3; 25 | public static final String NATIVE_FOLDER_PATH_PREFIX = "nativeutils"; 26 | 27 | /** 28 | * Temporary directory which will contain the DLLs. 29 | */ 30 | private static File temporaryDir; 31 | 32 | /** 33 | * Private constructor - this class will never be instanced 34 | */ 35 | private LibLoader() { 36 | } 37 | 38 | /** 39 | * Loads library from current JAR archive 40 | * 41 | * The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after 42 | * exiting. 43 | * Method uses String as filename because the pathname is "abstract", not system-dependent. 44 | * 45 | * @param path The path of file inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext 46 | * @throws IOException If temporary file creation or read/write operation fails 47 | * @throws IllegalArgumentException If source file (param path) does not exist 48 | * @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters 49 | * (restriction of {@link File#createTempFile(java.lang.String, java.lang.String)}). 50 | * @throws FileNotFoundException If the file could not be found inside the JAR. 51 | */ 52 | public static void loadLibraryFromJar(String path) throws IOException { 53 | 54 | if (null == path || !path.startsWith("/")) { 55 | throw new IllegalArgumentException("The path has to be absolute (start with '/')."); 56 | } 57 | 58 | // Obtain filename from path 59 | String[] parts = path.split("/"); 60 | String filename = (parts.length > 1) ? parts[parts.length - 1] : null; 61 | 62 | // Check if the filename is okay 63 | if (filename == null || filename.length() < MIN_PREFIX_LENGTH) { 64 | throw new IllegalArgumentException("The filename has to be at least 3 characters long."); 65 | } 66 | 67 | // Prepare temporary file 68 | if (temporaryDir == null) { 69 | temporaryDir = createTempDirectory(NATIVE_FOLDER_PATH_PREFIX); 70 | temporaryDir.deleteOnExit(); 71 | } 72 | 73 | File temp = new File(temporaryDir, filename); 74 | 75 | try (InputStream is = LibLoader.class.getResourceAsStream(path)) { 76 | Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); 77 | } catch (IOException e) { 78 | temp.delete(); 79 | throw e; 80 | } catch (NullPointerException e) { 81 | temp.delete(); 82 | throw new FileNotFoundException("File " + path + " was not found inside JAR."); 83 | } 84 | 85 | try { 86 | System.load(temp.getAbsolutePath()); 87 | } finally { 88 | if (isPosixCompliant()) { 89 | // Assume POSIX compliant file system, can be deleted after loading 90 | temp.delete(); 91 | } else { 92 | // Assume non-POSIX, and don't delete until last file descriptor closed 93 | temp.deleteOnExit(); 94 | } 95 | } 96 | } 97 | 98 | private static boolean isPosixCompliant() { 99 | try { 100 | return FileSystems.getDefault() 101 | .supportedFileAttributeViews() 102 | .contains("posix"); 103 | } catch (FileSystemNotFoundException 104 | | ProviderNotFoundException 105 | | SecurityException e) { 106 | return false; 107 | } 108 | } 109 | 110 | private static File createTempDirectory(String prefix) throws IOException { 111 | String tempDir = System.getProperty("java.io.tmpdir"); 112 | File generatedDir = new File(tempDir, prefix + System.nanoTime()); 113 | 114 | if (!generatedDir.mkdir()) 115 | throw new IOException("Failed to create temp directory " + generatedDir.getName()); 116 | 117 | return generatedDir; 118 | } 119 | } -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/api/ReducingZipper.kt: -------------------------------------------------------------------------------- 1 | package jsitter.api 2 | 3 | fun Node.reducingZipper(init: Acc, 4 | reducer: (Acc, Zipper<*>) -> Acc): ReducingZipper = 5 | ReducingZipper(init, this.zipper(), reducer) 6 | 7 | class ReducingZipper(val parentOrInit: Any?, 8 | val z: Zipper, 9 | val reducer: (Acc, Zipper<*>) -> Acc) : Zipper { 10 | override val alias: NodeType? 11 | get() = z.alias 12 | 13 | override fun up(): ReducingZipper<*, Acc>? { 14 | if (parentOrInit is ReducingZipper<*, *>) { 15 | return parentOrInit as ReducingZipper<*, Acc> 16 | } 17 | else { 18 | return null 19 | } 20 | } 21 | 22 | override fun down(): Zipper<*>? { 23 | val down = z.down() 24 | return if (down == null) { 25 | null 26 | } 27 | else { 28 | ReducingZipper( 29 | parentOrInit = this, 30 | z = down, 31 | reducer = this.reducer) 32 | } 33 | } 34 | 35 | override fun right(): Zipper<*>? { 36 | val right = z.right() 37 | return if (right == null) { 38 | null 39 | } 40 | else { 41 | ReducingZipper( 42 | parentOrInit = this.parentOrInit, 43 | z = right, 44 | reducer = this.reducer) 45 | } 46 | } 47 | 48 | override fun downRight(): Zipper<*>? { 49 | val right = z.downRight() 50 | return if (right == null) { 51 | null 52 | } 53 | else { 54 | ReducingZipper( 55 | parentOrInit = this.parentOrInit, 56 | z = right, 57 | reducer = this.reducer) 58 | } 59 | } 60 | 61 | 62 | override fun left(): Zipper<*>? { 63 | val left = z.left() 64 | return if (left == null) { 65 | null 66 | } 67 | else { 68 | ReducingZipper( 69 | parentOrInit = this.parentOrInit, 70 | z = left, 71 | reducer = this.reducer) 72 | } 73 | } 74 | 75 | override fun retainSubtree(): Node = 76 | z.retainSubtree() 77 | 78 | override val node: Node 79 | get() = this.z.node 80 | 81 | override val byteOffset: Int 82 | get() = this.z.byteOffset 83 | 84 | val acc: Acc by lazy { 85 | val parentAcc = if (this.parentOrInit is ReducingZipper<*, *>) { 86 | (this.parentOrInit as ReducingZipper<*, Acc>).acc 87 | } 88 | else { 89 | this.parentOrInit as Acc 90 | } 91 | this.reducer(parentAcc, this.z) 92 | } 93 | } -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/api/api.kt: -------------------------------------------------------------------------------- 1 | package jsitter.api 2 | 3 | import jsitter.impl.TSLanguage 4 | import jsitter.interop.JSitter 5 | import java.nio.ByteBuffer 6 | 7 | interface Node { 8 | val language: Language<*> 9 | val type: T 10 | val byteSize: Int 11 | fun zipper(): Zipper 12 | val padding: Int 13 | } 14 | 15 | interface Tree { 16 | fun adjust(edits: List): Tree 17 | val root: Node 18 | val actual: Boolean 19 | } 20 | 21 | interface Zipper { 22 | fun up(): Zipper<*>? 23 | fun down(): Zipper<*>? 24 | fun downRight(): Zipper<*>? 25 | fun right(): Zipper<*>? 26 | fun left(): Zipper<*>? 27 | fun skip(): Zipper<*>? { 28 | var u: Zipper<*>? = this 29 | while (u != null) { 30 | val r = u.right() 31 | if (r != null) { 32 | return r 33 | } else { 34 | u = u.up() 35 | } 36 | } 37 | return null 38 | } 39 | fun next(): Zipper<*>? = down() ?: skip() 40 | 41 | fun retainSubtree(): Node 42 | val node: Node 43 | 44 | val byteOffset: Int 45 | val byteSize: Int get() = this.node.byteSize 46 | val alias: NodeType? 47 | val type: NodeType get() = this.alias ?: this.node.type 48 | val padding: Int get() = this.node.padding 49 | } 50 | 51 | data class Edit(val startByte: Int, 52 | val oldEndByte: Int, 53 | val newEndByte: Int) 54 | 55 | open class NodeType(val name: String) { 56 | internal var id: Int = -1 57 | internal var initialized = false 58 | override fun toString(): String = name 59 | } 60 | 61 | 62 | 63 | object Error : NodeType("ERROR") { 64 | init { 65 | id = -1 66 | initialized = true 67 | } 68 | } 69 | 70 | open class Terminal(name: String) : NodeType(name) 71 | 72 | interface Language { 73 | 74 | companion object { 75 | @JvmStatic 76 | fun load(rootNodeType: T, name: String, nativeFactoryFunction: String, libName: String, classLoader: ClassLoader?): Language { 77 | val languagePtr = JSitter.loadLang(nativeFactoryFunction, libName, classLoader) 78 | val res = TSLanguage( 79 | languagePtr = languagePtr, 80 | name = name, 81 | rootNodeType = rootNodeType) 82 | res.register(rootNodeType) 83 | res.register(Error) 84 | return res 85 | } 86 | } 87 | val name: String 88 | val rootNodeType: NodeType 89 | fun parser(): Parser 90 | fun nodeType(name: String): NodeType 91 | fun register(nodeType: NodeType) 92 | } 93 | 94 | enum class Encoding(val i: Int) { UTF8(0), UTF16(1) } 95 | 96 | interface Text { 97 | /* 98 | * put data into ByteBuffer up to it's limit 99 | * */ 100 | fun read(byteOffset: Int, output: ByteBuffer) 101 | 102 | val encoding: Encoding 103 | } 104 | 105 | /* 106 | * Simple implementation of Text for testing purposes 107 | */ 108 | class StringText(val str: String) : Text { 109 | override val encoding: Encoding = Encoding.UTF16 110 | 111 | override fun read(byteOffset: Int, output: ByteBuffer) { 112 | val bytes = str.toByteArray(Charsets.UTF_16LE) 113 | output.put(bytes, byteOffset, Math.min(bytes.size - byteOffset, output.limit())) 114 | } 115 | } 116 | 117 | data class BytesRange(val start: Int, 118 | val end: Int) 119 | 120 | class CancellationToken { 121 | @Volatile 122 | internal var cancelled = false 123 | @Volatile 124 | internal var handler: (() -> Unit)? = null 125 | 126 | fun isCancelled() = cancelled 127 | fun cancel() { 128 | synchronized(this) { 129 | if (!cancelled) { 130 | cancelled = true 131 | val h = handler 132 | if (h != null) { 133 | h() 134 | } 135 | } 136 | } 137 | } 138 | 139 | internal fun onCancel(f: () -> Unit) { 140 | synchronized(this) { 141 | handler = f 142 | } 143 | } 144 | } 145 | 146 | interface Parser { 147 | fun parse(text: Text, adjustedTree: Tree? = null, cancellationToken: CancellationToken?): Tree? 148 | fun parse(text: Text, adjustedTree: Tree? = null): Tree = this.parse(text, adjustedTree, null)!! 149 | val language: Language 150 | } 151 | -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/impl/Cleaner.kt: -------------------------------------------------------------------------------- 1 | package jsitter.impl 2 | 3 | import java.lang.ref.PhantomReference 4 | import java.lang.ref.ReferenceQueue 5 | import java.util.concurrent.ConcurrentHashMap 6 | import java.util.concurrent.atomic.AtomicBoolean 7 | import java.util.concurrent.atomic.AtomicInteger 8 | 9 | typealias Disposer = () -> Unit 10 | interface Resource { 11 | fun disposer(): Disposer 12 | } 13 | 14 | object Cleaner { 15 | var DEBUG = false 16 | 17 | val refQueue = ReferenceQueue() 18 | val refs = ConcurrentHashMap() 19 | val debugAliveRefsCount = AtomicInteger() 20 | val started = AtomicBoolean() 21 | 22 | fun start() { 23 | val thread = Thread({ 24 | while (true) { 25 | val key = refQueue.remove() 26 | val disposer = refs.remove(key)!! 27 | debugAliveRefsCount.decrementAndGet() 28 | try { 29 | disposer() 30 | } catch(x: Throwable) { 31 | x.printStackTrace() 32 | } 33 | } 34 | }, "com.jetbrains.jsitter.cleaner") 35 | thread.isDaemon = true 36 | thread.start() 37 | } 38 | 39 | fun register(r: Resource) { 40 | val ref = PhantomReference(r, refQueue) 41 | val disposer = r.disposer() 42 | refs.put(ref, disposer) 43 | debugAliveRefsCount.incrementAndGet() 44 | if (started.compareAndSet(false, true)) { 45 | start() 46 | } 47 | } 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/impl/SubtreeAccess.kt: -------------------------------------------------------------------------------- 1 | package jsitter.impl 2 | 3 | import sun.misc.Unsafe 4 | import kotlin.experimental.and 5 | 6 | typealias Ptr = Long 7 | 8 | object SubtreeAccess { 9 | 10 | val unsafe: Unsafe = Unsafe::class.java.getDeclaredField("theUnsafe").let { field -> 11 | field.isAccessible = true 12 | field.get(null) as Unsafe 13 | } 14 | 15 | fun isInline(subtree: Ptr) : Boolean { 16 | return subtree.and(1) == 1L 17 | } 18 | 19 | val ref_count = 0 20 | val padding = 4 21 | val size = padding + 12 22 | val lookahead_bytes = size + 12 23 | val error_cost = lookahead_bytes + 4 24 | val child_count = error_cost + 4 25 | val symbol = child_count + 4 26 | val parse_state = symbol + 2 27 | val flags = parse_state + 2 28 | 29 | val children = 48 30 | val visible_children_count = children + 8 31 | val named_child_count = visible_children_count + 4 32 | val node_count = named_child_count + 4 33 | val repeat_depth = node_count + 4 34 | val dyn_prec = repeat_depth + 4 35 | val production_id = dyn_prec + 4 36 | 37 | 38 | fun readShort(addr: Ptr) : Int { 39 | return ((unsafe.getShort(addr)).toInt()).and(0xFFFF) 40 | } 41 | 42 | fun subtreeNodeType(subtree: Ptr) : Int { 43 | if (subtree == 0L) { 44 | throw NullPointerException() 45 | } 46 | if (isInline(subtree)) { 47 | return subtree.byte(1) 48 | } else { 49 | return readShort(subtree + symbol) 50 | } 51 | } 52 | 53 | fun childCount(subtree: Ptr): Int { 54 | if (subtree == 0L) { 55 | throw NullPointerException() 56 | } 57 | if (isInline(subtree)) { 58 | return 0 59 | } else { 60 | return unsafe.getInt(subtree + child_count) 61 | } 62 | } 63 | 64 | fun childAt(subtree: Ptr, i: Int): Ptr { 65 | if (subtree == 0L) { 66 | throw NullPointerException() 67 | } 68 | if (isInline(subtree)) { 69 | throw AssertionError() 70 | } 71 | val children_ptr = unsafe.getAddress(subtree + children) 72 | return unsafe.getAddress(children_ptr + i * 8) 73 | } 74 | 75 | fun extra(subtree: Ptr): Boolean { 76 | if (subtree == 0L) { 77 | throw NullPointerException() 78 | } 79 | if (isInline(subtree)) { 80 | return subtree.byte(0).and(8) == 8 81 | } else { 82 | val flags = unsafe.getByte(subtree + flags) 83 | return flags.and(1.shl(2)) != 0.toByte() 84 | } 85 | } 86 | 87 | fun subtreeBytesSize(subtree: Ptr): Int { 88 | if (subtree == 0L) { 89 | throw NullPointerException() 90 | } 91 | if (isInline(subtree)) { 92 | return subtree.byte(3) 93 | } else { 94 | return unsafe.getInt(subtree + size) 95 | } 96 | } 97 | 98 | fun subtreeBytesPadding(subtree: Ptr): Int { 99 | if (subtree == 0L) { 100 | throw NullPointerException() 101 | } 102 | if (isInline(subtree)) { 103 | return subtree.byte(2) 104 | } else { 105 | return unsafe.getInt(subtree + padding) 106 | } 107 | } 108 | 109 | fun isVisible(subtree: Ptr): Boolean { 110 | if (subtree == 0L) { 111 | throw NullPointerException() 112 | } 113 | if (isInline(subtree)) { 114 | return subtree.byte(0).and(2) == 2 115 | } else { 116 | return unsafe.getByte(subtree + flags).and(1) == 1.toByte() 117 | } 118 | } 119 | 120 | fun productionId(subtree: Ptr): Int { 121 | if (subtree == 0L) { 122 | throw NullPointerException() 123 | } 124 | if (isInline(subtree)) { 125 | throw AssertionError() 126 | } 127 | return readShort(subtree + production_id) 128 | } 129 | 130 | fun visibleChildCount(subtree: Ptr): Int { 131 | if (subtree == 0L) { 132 | throw NullPointerException() 133 | } 134 | if (isInline(subtree)) { 135 | return 0 136 | } else { 137 | return unsafe.getInt(subtree + visible_children_count) 138 | } 139 | } 140 | 141 | fun aliasSequenceAt(aliasSequence: Ptr, structuralChildIndex: Int): Int { 142 | return readShort(aliasSequence + structuralChildIndex * 2) 143 | } 144 | 145 | fun aliasSequence(lang: Ptr, productionId: Int): Ptr { 146 | val alias_sequences_offset = 64 147 | val max_alias_sequence_length_offset = alias_sequences_offset + 8 148 | if (productionId > 0) { 149 | val maxAliasSequenceLength = readShort(lang + max_alias_sequence_length_offset) 150 | val aliasSequencesPtr = unsafe.getAddress(lang + alias_sequences_offset) 151 | return aliasSequencesPtr + 2 * productionId * maxAliasSequenceLength 152 | } else { 153 | return 0L 154 | } 155 | } 156 | 157 | fun root(treePtr: Ptr): Ptr { 158 | if (treePtr == 0L) { 159 | throw NullPointerException() 160 | } 161 | return unsafe.getAddress(treePtr) 162 | } 163 | } 164 | 165 | fun Long.byte(i: Int): Int { 166 | val l = this 167 | return when (i) { 168 | 0 -> l.and(0xFF).toInt() 169 | 1 -> l.and(0xFF00).shr(8).toInt() 170 | 2 -> l.and(0xFF0000).shr(16).toInt() 171 | 3 -> l.and(0xFF000000).shr(24).toInt() 172 | 4 -> l.and(0xFF00000000).shr(32).toInt() 173 | 5 -> l.and(0xFF0000000000).shr(40).toInt() 174 | 6 -> l.and(0xFF000000000000).shr(48).toInt() 175 | 7 -> l.shr(56).toInt() 176 | else -> throw AssertionError() 177 | } 178 | } -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/impl/TSLanguage.kt: -------------------------------------------------------------------------------- 1 | package jsitter.impl 2 | 3 | import jsitter.api.* 4 | import jsitter.interop.JSitter 5 | import java.util.concurrent.ConcurrentHashMap 6 | import java.util.concurrent.ConcurrentMap 7 | 8 | typealias TSSymbol = Int 9 | 10 | class TSLanguage( 11 | val languagePtr: Ptr, 12 | override val name: String, 13 | override val rootNodeType: T, 14 | val registry: ConcurrentMap = ConcurrentHashMap(), 15 | val nodeTypesCache: ConcurrentMap = ConcurrentHashMap()) : Language { 16 | 17 | fun getNodeType(tsSymbol: TSSymbol): NodeType = 18 | nodeTypesCache.computeIfAbsent(tsSymbol) { symbol -> 19 | if (symbol.toInt() == -1) { 20 | Error 21 | } 22 | else { 23 | val name: String = JSitter.getSymbolName(languagePtr, symbol) 24 | val isTerminal: Boolean = JSitter.isTerminal(languagePtr, symbol) 25 | val nodeType = registry.computeIfAbsent(name) { name -> 26 | if (isTerminal) { 27 | Terminal(name) 28 | } 29 | else { 30 | NodeType(name) 31 | } 32 | } 33 | nodeType.id = symbol.toInt() 34 | nodeType 35 | } 36 | } 37 | 38 | fun getNodeTypeSymbol(nodeType: NodeType): TSSymbol = 39 | if (nodeType.initialized) { 40 | nodeType.id 41 | } 42 | else { 43 | val symbol: TSSymbol = JSitter.getSymbolByName(languagePtr, nodeType.name) 44 | nodeType.id = symbol 45 | nodeType.initialized = true 46 | symbol 47 | } 48 | 49 | override fun nodeType(name: String): NodeType = registry[name]!! 50 | 51 | override fun parser(): Parser { 52 | val cancellationFlagPtr = SubtreeAccess.unsafe.allocateMemory(8) 53 | return TSParser(parserPtr = JSitter.newParser(languagePtr, cancellationFlagPtr), 54 | language = this, 55 | nodeType = rootNodeType, 56 | cancellationFlagPtr = cancellationFlagPtr) 57 | } 58 | 59 | override fun register(nodeType: NodeType) { 60 | registry[nodeType.name] = nodeType 61 | } 62 | } -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/impl/TSParser.kt: -------------------------------------------------------------------------------- 1 | package jsitter.impl 2 | 3 | import jsitter.api.* 4 | import jsitter.interop.JSitter 5 | import java.nio.ByteBuffer 6 | 7 | data class TSParser(val parserPtr: Ptr, 8 | override val language: TSLanguage, 9 | val nodeType: NodeType, 10 | val cancellationFlagPtr: Ptr) : Parser, Resource { 11 | val readingBuffer: ByteBuffer = ByteBuffer.allocateDirect(READING_BUFFER_CAPACITY) 12 | 13 | init { 14 | Cleaner.register(this) 15 | } 16 | 17 | override fun disposer(): Disposer { 18 | val parserPtr = this.parserPtr 19 | val cancellationFlagPtr = this.cancellationFlagPtr 20 | return { 21 | JSitter.releaseParser(parserPtr) 22 | SubtreeAccess.unsafe.freeMemory(cancellationFlagPtr) 23 | } 24 | } 25 | 26 | override fun parse(text: Text, adjustedTree: Tree?, cancellationToken: CancellationToken?): Tree? { 27 | if (adjustedTree?.actual == true) { 28 | return adjustedTree 29 | } 30 | synchronized(this) { 31 | SubtreeAccess.unsafe.putLong(this.cancellationFlagPtr, 0) 32 | cancellationToken?.onCancel { 33 | SubtreeAccess.unsafe.putLong(this.cancellationFlagPtr, 1) 34 | } 35 | if (cancellationToken?.cancelled == true) { 36 | return null 37 | } 38 | val newTreePtr = JSitter.parse( 39 | this.parserPtr, 40 | (adjustedTree as TSTree?)?.treePtr ?: 0L, 41 | TSTextInput(text, this.readingBuffer), 42 | text.encoding.i, 43 | this.readingBuffer) 44 | JSitter.parserReset(parserPtr) 45 | if (newTreePtr == 0L) { 46 | return null 47 | } 48 | if (cancellationToken?.cancelled == true) { 49 | JSitter.releaseTree(newTreePtr) 50 | return null 51 | } 52 | return TSTree( 53 | treePtr = newTreePtr, 54 | root = TSSubtree( 55 | language = this.language, 56 | lifetime = TSTreeResource(newTreePtr), 57 | subtreePtr = SubtreeAccess.root(newTreePtr)), 58 | actual = true) 59 | } 60 | } 61 | } -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/impl/TSTextInput.kt: -------------------------------------------------------------------------------- 1 | package jsitter.impl 2 | 3 | import jsitter.api.Text 4 | import jsitter.interop.JSitter 5 | import java.nio.ByteBuffer 6 | 7 | const val READING_BUFFER_CAPACITY = 1024 * 1024 8 | 9 | class TSTextInput(val text: Text, 10 | val readingBuffer: ByteBuffer) : JSitter.Input { 11 | override fun read(byteOffset: Int): Int { 12 | try { 13 | text.read(byteOffset, readingBuffer) 14 | val bytesCount = readingBuffer.position() 15 | readingBuffer.rewind() 16 | return bytesCount 17 | } 18 | catch (x: Throwable) { 19 | System.err.println(x) 20 | return 0 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/impl/TSTree.kt: -------------------------------------------------------------------------------- 1 | @file:Suppress("UNCHECKED_CAST") 2 | 3 | package jsitter.impl 4 | 5 | import jsitter.api.* 6 | import jsitter.interop.* 7 | 8 | class TSTreeResource(val treePtr: Ptr) : Resource { 9 | init { 10 | Cleaner.register(this) 11 | } 12 | 13 | override fun disposer(): Disposer { 14 | val treePtr = this.treePtr 15 | return { 16 | JSitter.releaseTree(treePtr) 17 | } 18 | } 19 | } 20 | 21 | class TSSubtreeResource(val subtreePtr: Ptr) : Resource { 22 | init { 23 | Cleaner.register(this) 24 | } 25 | 26 | override fun disposer(): Disposer { 27 | val subtreePtr = this.subtreePtr 28 | return { 29 | JSitter.releaseSubtree(subtreePtr) 30 | } 31 | } 32 | } 33 | 34 | data class TSSubtree(override val language: TSLanguage<*>, 35 | val subtreePtr: Ptr, 36 | val lifetime: Resource?) : Node { 37 | override fun equals(other: Any?): Boolean = 38 | other is TSSubtree<*> && other.subtreePtr == this.subtreePtr 39 | 40 | override fun hashCode(): Int = 41 | subtreePtr.toInt() + 1 42 | 43 | override val type: T by lazy { 44 | this.language.getNodeType(SubtreeAccess.subtreeNodeType(this.subtreePtr)) as T 45 | } 46 | 47 | override val byteSize: Int 48 | get() = SubtreeAccess.subtreeBytesSize(this.subtreePtr) 49 | 50 | override val padding: Int 51 | get() = SubtreeAccess.subtreeBytesPadding(this.subtreePtr) 52 | 53 | override fun zipper(): Zipper = 54 | TSZipper( 55 | node = this, 56 | parent = null, 57 | structuralChildIndex = 0, 58 | parentAliasSequence = 0L, 59 | byteOffset = SubtreeAccess.subtreeBytesPadding(this.subtreePtr), 60 | childIndex = 0) 61 | } 62 | 63 | data class TSTree(val treePtr: Ptr, 64 | override val root: TSSubtree, 65 | override val actual: Boolean) : Tree { 66 | override fun adjust(edits: List): Tree { 67 | if (edits.isEmpty()) { 68 | return this 69 | } 70 | else { 71 | val treeCopy = JSitter.copyTree(this.treePtr) 72 | for (e in edits) { 73 | JSitter.editTree(treeCopy, e.startByte, e.oldEndByte, e.newEndByte) 74 | } 75 | return TSTree( 76 | treePtr = treeCopy, 77 | root = root.copy( 78 | subtreePtr = SubtreeAccess.root(treeCopy), 79 | lifetime = TSTreeResource(treeCopy)), 80 | actual = false) 81 | } 82 | } 83 | } 84 | 85 | 86 | -------------------------------------------------------------------------------- /src/main/kotlin/jsitter/impl/TSZipper.kt: -------------------------------------------------------------------------------- 1 | package jsitter.impl 2 | 3 | import jsitter.api.Node 4 | import jsitter.api.NodeType 5 | import jsitter.api.Zipper 6 | import jsitter.interop.JSitter 7 | 8 | private tailrec fun up(z: TSZipper<*>): TSZipper<*>? = 9 | when { 10 | z.parent == null -> null 11 | z.parent.visible() -> z.parent 12 | else -> up(z.parent) 13 | } 14 | 15 | private tailrec fun down(zip: TSZipper<*>) : TSZipper<*>? = 16 | if (SubtreeAccess.childCount(zip.node.subtreePtr) == 0) { 17 | null 18 | } else { 19 | val child = SubtreeAccess.childAt(zip.node.subtreePtr, 0) 20 | val lang = zip.node.language.languagePtr 21 | val productionId = SubtreeAccess.productionId(zip.node.subtreePtr) 22 | val aliasSequence = SubtreeAccess.aliasSequence(lang, productionId) 23 | val res = TSZipper( 24 | parent = zip, 25 | parentAliasSequence = aliasSequence, 26 | node = TSSubtree( 27 | subtreePtr = child, 28 | lifetime = zip.node.lifetime, 29 | language = zip.node.language), 30 | byteOffset = zip.byteOffset, 31 | childIndex = 0, 32 | structuralChildIndex = 0) 33 | if (res.visible()) { 34 | res 35 | } else { 36 | if (res.visibleChildCount() > 0) { 37 | down(res) 38 | } else { 39 | right(res) 40 | } 41 | } 42 | } 43 | 44 | private tailrec fun right(zip: TSZipper<*>): TSZipper<*>? = 45 | when { 46 | zip.parent == null -> 47 | null 48 | 49 | zip.childIndex == SubtreeAccess.childCount(zip.parent.node.subtreePtr) - 1 -> 50 | if (zip.parent.visible()) { 51 | null 52 | } else { 53 | right(zip.parent) 54 | } 55 | 56 | else -> { 57 | val sibling: Ptr = SubtreeAccess.childAt(zip.parent.node.subtreePtr, zip.childIndex + 1) 58 | val structuralChildIndex = 59 | if (!SubtreeAccess.extra(zip.node.subtreePtr)) { 60 | zip.structuralChildIndex + 1 61 | } else { 62 | zip.structuralChildIndex 63 | } 64 | val byteOffset = zip.byteOffset + SubtreeAccess.subtreeBytesSize(zip.node.subtreePtr) + SubtreeAccess.subtreeBytesPadding(sibling) 65 | val res = TSZipper( 66 | parent = zip.parent, 67 | parentAliasSequence = zip.parentAliasSequence, 68 | node = TSSubtree( 69 | subtreePtr = sibling, 70 | lifetime = zip.node.lifetime, 71 | language = zip.node.language), 72 | byteOffset = byteOffset, 73 | childIndex = zip.childIndex + 1, 74 | structuralChildIndex = structuralChildIndex) 75 | if (res.visible()) { 76 | res 77 | } else { 78 | down(res) ?: right(res) 79 | } 80 | } 81 | } 82 | 83 | private fun invisibleRight(zip: TSZipper<*>): TSZipper<*>? = 84 | when { 85 | zip.parent == null -> null 86 | zip.childIndex == SubtreeAccess.childCount(zip.parent.node.subtreePtr) - 1 -> null 87 | else -> { 88 | val sibling: Ptr = SubtreeAccess.childAt(zip.parent.node.subtreePtr, zip.childIndex + 1) 89 | val structuralChildIndex = 90 | if (!SubtreeAccess.extra(zip.node.subtreePtr)) { 91 | zip.structuralChildIndex + 1 92 | } else { 93 | zip.structuralChildIndex 94 | } 95 | val byteOffset = zip.byteOffset + SubtreeAccess.subtreeBytesSize(zip.node.subtreePtr) + SubtreeAccess.subtreeBytesPadding(sibling) 96 | TSZipper( 97 | parent = zip.parent, 98 | parentAliasSequence = zip.parentAliasSequence, 99 | node = TSSubtree( 100 | subtreePtr = sibling, 101 | lifetime = zip.node.lifetime, 102 | language = zip.node.language), 103 | byteOffset = byteOffset, 104 | childIndex = zip.childIndex + 1, 105 | structuralChildIndex = structuralChildIndex) 106 | } 107 | } 108 | 109 | private tailrec fun downRight(zip: TSZipper<*>): TSZipper<*>? { 110 | return if (SubtreeAccess.childCount(zip.node.subtreePtr) == 0) { 111 | null 112 | } else { 113 | val firstChild = SubtreeAccess.childAt(zip.node.subtreePtr, 0) 114 | val lang = zip.node.language.languagePtr 115 | val productionId = SubtreeAccess.productionId(zip.node.subtreePtr) 116 | val aliasSequence = SubtreeAccess.aliasSequence(lang, productionId) 117 | var res: TSZipper<*> = TSZipper( 118 | parent = zip, 119 | parentAliasSequence = aliasSequence, 120 | node = TSSubtree( 121 | subtreePtr = firstChild, 122 | lifetime = zip.node.lifetime, 123 | language = zip.node.language), 124 | byteOffset = zip.byteOffset, 125 | childIndex = 0, 126 | structuralChildIndex = 0) 127 | var r: TSZipper<*>? = res 128 | while (r != null) { 129 | r = invisibleRight(r) 130 | if (r != null) { 131 | if (r.visible() || r.visibleChildCount() > 0) { 132 | res = r 133 | } 134 | } 135 | } 136 | 137 | if (res.visible()) { 138 | res 139 | } else { 140 | if (res.visibleChildCount() > 0) { 141 | downRight(res) 142 | } else { 143 | null 144 | } 145 | } 146 | } 147 | } 148 | 149 | private tailrec fun left(z: TSZipper<*>): TSZipper<*>? { 150 | val parent = z.parent 151 | return when { 152 | parent == null -> { 153 | null 154 | } 155 | z.childIndex == 0 -> { 156 | if (parent.visible()) { 157 | null 158 | } else { 159 | left(parent) 160 | } 161 | } 162 | else -> { 163 | val sibling: Ptr = SubtreeAccess.childAt(parent.node.subtreePtr, z.childIndex - 1) 164 | val structuralChildIndex = 165 | if (!SubtreeAccess.extra(z.node.subtreePtr)) { 166 | z.structuralChildIndex - 1 167 | } else { 168 | z.structuralChildIndex 169 | } 170 | val byteOffset = z.byteOffset - SubtreeAccess.subtreeBytesPadding(z.node.subtreePtr) - SubtreeAccess.subtreeBytesSize(sibling) 171 | val res = TSZipper( 172 | parent = parent, 173 | parentAliasSequence = z.parentAliasSequence, 174 | node = TSSubtree( 175 | subtreePtr = sibling, 176 | language = z.node.language, 177 | lifetime = z.node.lifetime), 178 | byteOffset = byteOffset, 179 | childIndex = z.childIndex - 1, 180 | structuralChildIndex = structuralChildIndex) 181 | if (res.visible()) { 182 | res 183 | } else { 184 | downRight(res) ?: left(res) 185 | } 186 | } 187 | } 188 | } 189 | 190 | class TSZipper(val parent: TSZipper<*>?, 191 | val parentAliasSequence: Ptr, 192 | override val node: TSSubtree, 193 | override val byteOffset: Int, 194 | val childIndex: Int, 195 | val structuralChildIndex: Int) : Zipper { 196 | override val alias: NodeType? 197 | get() { 198 | val alias = this.aliasSymbol() 199 | return if (alias != 0) { 200 | this.node.language.getNodeType(alias) 201 | } 202 | else { 203 | null 204 | } 205 | } 206 | 207 | fun aliasSymbol(): TSSymbol = 208 | if (this.parentAliasSequence != 0L && !SubtreeAccess.extra(this.node.subtreePtr)) { 209 | SubtreeAccess.aliasSequenceAt(this.parentAliasSequence, this.structuralChildIndex) 210 | } 211 | else { 212 | 0 213 | } 214 | 215 | fun visible(): Boolean = 216 | SubtreeAccess.isVisible(this.node.subtreePtr) || this.aliasSymbol() != 0 217 | 218 | override fun up(): Zipper<*>? = up(this) 219 | 220 | override fun down(): Zipper<*>? = down(this) 221 | 222 | fun visibleChildCount(): Int = SubtreeAccess.visibleChildCount(this.node.subtreePtr) 223 | 224 | override fun downRight(): Zipper<*>? = downRight(this) 225 | 226 | override fun left(): Zipper<*>? = left(this) 227 | 228 | override fun right(): Zipper<*>? = right(this) 229 | 230 | override fun retainSubtree(): Node { 231 | JSitter.retainSubtree(this.node.subtreePtr) 232 | return node.copy(lifetime = TSSubtreeResource(this.node.subtreePtr)) 233 | } 234 | } -------------------------------------------------------------------------------- /src/test/kotlin/jsitter/test/tests.kt: -------------------------------------------------------------------------------- 1 | package jsitter.test 2 | 3 | import jsitter.api.* 4 | import junit.framework.Assert.assertEquals 5 | import org.junit.Test 6 | import java.nio.ByteBuffer 7 | import java.nio.file.Files 8 | import java.nio.file.Paths 9 | 10 | object SourceFile : NodeType("source_file") 11 | 12 | fun golang(): Language { 13 | val lang = Language.load(SourceFile, "go", "tree_sitter_go", "libtsgo.dylib", Language::class.java.classLoader) 14 | lang.register(SourceFile) 15 | return lang 16 | } 17 | 18 | class Test1 { 19 | 20 | @Test 21 | fun visiting2() { 22 | val lang = golang() 23 | val parser = lang.parser() 24 | val tree = parser.parse(StringText("type Y = struct {x []string `yyy`}")) 25 | var zipper: Zipper<*>? = tree.root.zipper() 26 | val str = arrayListOf() 27 | while (zipper != null) { 28 | val nodeTypeStr = zipper.node.type.toString() 29 | val aliasStr = zipper.alias?.toString() 30 | str.add(if (aliasStr != null) "$nodeTypeStr($aliasStr)" else nodeTypeStr) 31 | zipper = zipper.next() 32 | } 33 | assertEquals(listOf( 34 | "source_file", 35 | "type_declaration", 36 | "type", 37 | "type_alias", 38 | "identifier(type_identifier)", 39 | "=", 40 | "struct_type", 41 | "struct", 42 | "field_declaration_list", 43 | "{", 44 | "field_declaration", 45 | "identifier(field_identifier)", 46 | "slice_type", 47 | "[", 48 | "]", 49 | "identifier(type_identifier)", 50 | "raw_string_literal", 51 | "}" 52 | ), str) 53 | } 54 | 55 | @Test 56 | fun visitingTree() { 57 | val lang = golang() 58 | val parser = lang.parser() 59 | var tree = parser.parse(StringText("func hello() { sayHello() }")) 60 | var zipper: Zipper<*>? = tree.root.zipper() 61 | val str = arrayListOf() 62 | while (zipper != null) { 63 | str.add(zipper.node.type.toString()) 64 | zipper = zipper.next() 65 | } 66 | assertEquals(listOf("source_file", 67 | "function_declaration", 68 | "func", 69 | "identifier", 70 | "parameter_list", 71 | "(", 72 | ")", 73 | "block", 74 | "{", 75 | "call_expression", 76 | "identifier", 77 | "argument_list", 78 | "(", 79 | ")", 80 | "}"), str) 81 | val codeBlock = tree.root.zipper() 82 | .down()!! 83 | .down()!! 84 | .right()!! 85 | .right()!! 86 | .right()!! 87 | .down()!! 88 | .right()!! 89 | assertEquals("call_expression", codeBlock.node.type.name) 90 | tree = tree.adjust(listOf(Edit(5*2, (5 + 5)*2, (5 + 3) * 2))) 91 | tree = parser.parse(StringText("func bye() { sayHello() }"), adjustedTree = tree) 92 | var z: Zipper<*>? = tree.root.zipper() 93 | val str2 = arrayListOf() 94 | while (z != null) { 95 | str2 += z.node.type.toString() 96 | z = z.next() 97 | } 98 | assertEquals(listOf("source_file", 99 | "function_declaration", 100 | "func", 101 | "identifier", 102 | "parameter_list", 103 | "(", 104 | ")", 105 | "block", 106 | "{", 107 | "call_expression", 108 | "identifier", 109 | "argument_list", 110 | "(", 111 | ")", 112 | "}"), str2) 113 | tree = tree.adjust(listOf(Edit(8 * 2, 8 * 2, 13 * 2), Edit(17 * 2, (17 + 11) * 2, 17 * 2))) 114 | tree = parser.parse(StringText("func byeWorld() { }"), adjustedTree = tree) 115 | z = tree.root.zipper() 116 | val str3 = arrayListOf() 117 | while (z != null) { 118 | str3 += z.node.type.toString() 119 | z = z.next() 120 | } 121 | assertEquals(listOf("source_file", 122 | "function_declaration", 123 | "func", 124 | "identifier", 125 | "parameter_list", 126 | "(", 127 | ")", 128 | "block", 129 | "{", 130 | "}"), str3) 131 | } 132 | 133 | @Test 134 | fun perf() { 135 | val bytes = Files.readAllBytes(Paths.get("testData/router_go")) 136 | val text = object : Text { 137 | override fun read(byteOffset: Int, output: ByteBuffer) { 138 | output.put(bytes, byteOffset, Math.min(bytes.size - byteOffset, output.limit())) 139 | } 140 | 141 | override val encoding: Encoding = Encoding.UTF8 142 | } 143 | val lang = golang() 144 | val parser = lang.parser() 145 | val start1 = System.nanoTime() 146 | val tree = parser.parse(text) 147 | val end1 = System.nanoTime() 148 | println("parse time = ${end1 - start1}") 149 | var zipper: Zipper<*>? = tree.root.zipper() 150 | var nodesCount = 0 151 | val start2 = System.nanoTime() 152 | while (zipper != null) { 153 | zipper = zipper.next() 154 | nodesCount++ 155 | } 156 | val end2 = System.nanoTime() 157 | println("walk1 time = ${end2 - start2}") 158 | println("nodesCount = ${nodesCount}") 159 | val start = System.nanoTime() 160 | zipper = tree.root.zipper() 161 | while (zipper != null) { 162 | zipper = zipper.next() 163 | } 164 | val end = System.nanoTime() 165 | println("walk2 time = ${end - start}") 166 | } 167 | } 168 | 169 | --------------------------------------------------------------------------------