├── Emigo.png ├── requirements.txt ├── queries ├── tree-sitter-languages │ ├── elisp-tags.scm │ ├── python-tags.scm │ ├── c-tags.scm │ ├── java-tags.scm │ ├── kotlin-tags.scm │ ├── ql-tags.scm │ ├── php-tags.scm │ ├── cpp-tags.scm │ ├── go-tags.scm │ ├── elm-tags.scm │ ├── c_sharp-tags.scm │ ├── typescript-tags.scm │ ├── ruby-tags.scm │ ├── rust-tags.scm │ ├── elixir-tags.scm │ ├── hcl-tags.scm │ ├── README.md │ ├── javascript-tags.scm │ ├── dart-tags.scm │ └── ocaml-tags.scm └── tree-sitter-language-pack │ └── javascript-tags.scm ├── .github └── workflows │ └── doctor-pr.yml ├── todo.md ├── .gitignore ├── REPLIT_SETUP_COMPLETE.md ├── llm_providers.py ├── replit.md ├── test_setup.py ├── README.md ├── README_REPLIT.md ├── config.py ├── agent.py ├── utils.py ├── LICENSE ├── tool_definitions.py ├── llm.py ├── session.py ├── system_prompt.py ├── tools.py └── emigo-epc.el /Emigo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthewZMD/emigo/HEAD/Emigo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | epc 2 | networkx 3 | pygments 4 | grep-ast 5 | diskcache 6 | tiktoken 7 | tqdm 8 | gitignore_parser 9 | scipy 10 | litellm 11 | orjson 12 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/elisp-tags.scm: -------------------------------------------------------------------------------- 1 | ;; defun/defsubst 2 | (function_definition name: (symbol) @name.definition.function) @definition.function 3 | 4 | ;; Treat macros as function definitions for the sake of TAGS. 5 | (macro_definition name: (symbol) @name.definition.function) @definition.function 6 | 7 | ;; Match function calls 8 | (list (symbol) @name.reference.function) @reference.function 9 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/python-tags.scm: -------------------------------------------------------------------------------- 1 | (class_definition 2 | name: (identifier) @name.definition.class) @definition.class 3 | 4 | (function_definition 5 | name: (identifier) @name.definition.function) @definition.function 6 | 7 | (call 8 | function: [ 9 | (identifier) @name.reference.call 10 | (attribute 11 | attribute: (identifier) @name.reference.call) 12 | ]) @reference.call 13 | -------------------------------------------------------------------------------- /.github/workflows/doctor-pr.yml: -------------------------------------------------------------------------------- 1 | name: Doctor PR 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | action_input: 6 | required: true 7 | type: string 8 | jobs: 9 | doctor-pr: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Doctor PR 13 | uses: Doctor-PR/action@latest 14 | with: 15 | action_input: ${{inputs.action_input}} 16 | anthropic_api_key: ${{secrets.ANTHROPIC_API_KEY}} 17 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/c-tags.scm: -------------------------------------------------------------------------------- 1 | (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class 2 | 3 | (declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class 4 | 5 | (function_declarator declarator: (identifier) @name.definition.function) @definition.function 6 | 7 | (type_definition declarator: (type_identifier) @name.definition.type) @definition.type 8 | 9 | (enum_specifier name: (type_identifier) @name.definition.type) @definition.type 10 | -------------------------------------------------------------------------------- /todo.md: -------------------------------------------------------------------------------- 1 | 1. 是否要对 AI 输出的代码也做语法高亮? 我感觉不需要吧, diff 高亮就可以了, 如果没有输出 diff, 基本上也没啥用 2 | 2. 怎么根据 AI 输出生成 diff files 列表? Aidermacs 代码搬运过来? 每个项目都要按照文件粒度缓存补丁 3 | 3. diff review 的界面: 左边铺满, 左边上面分别是 "全部文件、文件 A、文件 B", 左边下面是 "全部文件的 hunks, 文件 A 的 hunks, 文件 B 的 hunks", 支持整个文件 apply/cancel 和 hunk 的 apply/cancel 4 | 4. 右侧栏应该显示所有 session 的状态,方便用户知道 AI 干完活以后,手动切换 session 5 | 5. 研究 Cursor 的提示词, 看看能否用 RAG 的方式来增强 aider tree-sitter 这种 repomap 的方式? 我总感觉 Cursor 的那种模式要高级一点, aider 适合自己的项目精确重构, Cursor 适应范围要广很多 6 | 6. 可以随时更改过去的某个 prompt,然后重新发给 LLM, 执行这个命令的时候, 建议临时取消 read-only 后, 编辑后重新发送 7 | 7. 动态切换 AI Model 8 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/java-tags.scm: -------------------------------------------------------------------------------- 1 | (class_declaration 2 | name: (identifier) @name.definition.class) @definition.class 3 | 4 | (method_declaration 5 | name: (identifier) @name.definition.method) @definition.method 6 | 7 | (method_invocation 8 | name: (identifier) @name.reference.call 9 | arguments: (argument_list) @reference.call) 10 | 11 | (interface_declaration 12 | name: (identifier) @name.definition.interface) @definition.interface 13 | 14 | (type_list 15 | (type_identifier) @name.reference.implementation) @reference.implementation 16 | 17 | (object_creation_expression 18 | type: (type_identifier) @name.reference.class) @reference.class 19 | 20 | (superclass (type_identifier) @name.reference.class) @reference.class 21 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/kotlin-tags.scm: -------------------------------------------------------------------------------- 1 | ; Definitions 2 | 3 | (class_declaration 4 | (type_identifier) @name.definition.class) @definition.class 5 | 6 | (function_declaration 7 | (simple_identifier) @name.definition.function) @definition.function 8 | 9 | (object_declaration 10 | (type_identifier) @name.definition.object) @definition.object 11 | 12 | ; References 13 | 14 | (call_expression 15 | [ 16 | (simple_identifier) @name.reference.call 17 | (navigation_expression 18 | (navigation_suffix 19 | (simple_identifier) @name.reference.call)) 20 | ]) @reference.call 21 | 22 | (delegation_specifier 23 | [ 24 | (user_type) @name.reference.type 25 | (constructor_invocation 26 | (user_type) @name.reference.type) 27 | ]) @reference.type 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | pip-wheel-metadata/ 20 | share/python-wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | 26 | # Virtual environments 27 | .env 28 | .venv 29 | env/ 30 | venv/ 31 | ENV/ 32 | env.bak/ 33 | venv.bak/ 34 | 35 | # IDE 36 | .vscode/ 37 | .idea/ 38 | *.swp 39 | *.swo 40 | *~ 41 | 42 | # Emacs 43 | \#*\# 44 | .\#* 45 | *.elc 46 | auto-save-list 47 | tramp 48 | .\#* 49 | 50 | # Cache 51 | .cache/ 52 | .pythonlibs/ 53 | *.cache 54 | 55 | # Emigo specific 56 | .emigo_repomap/ 57 | 58 | # Replit 59 | .replit 60 | .upm/ 61 | .config/ 62 | replit.nix 63 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/ql-tags.scm: -------------------------------------------------------------------------------- 1 | (classlessPredicate 2 | name: (predicateName) @name.definition.function) @definition.function 3 | 4 | (memberPredicate 5 | name: (predicateName) @name.definition.method) @definition.method 6 | 7 | (aritylessPredicateExpr 8 | name: (literalId) @name.reference.call) @reference.call 9 | 10 | (module 11 | name: (moduleName) @name.definition.module) @definition.module 12 | 13 | (dataclass 14 | name: (className) @name.definition.class) @definition.class 15 | 16 | (datatype 17 | name: (className) @name.definition.class) @definition.class 18 | 19 | (datatypeBranch 20 | name: (className) @name.definition.class) @definition.class 21 | 22 | (qualifiedRhs 23 | name: (predicateName) @name.reference.call) @reference.call 24 | 25 | (typeExpr 26 | name: (className) @name.reference.type) @reference.type 27 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/php-tags.scm: -------------------------------------------------------------------------------- 1 | (class_declaration 2 | name: (name) @name.definition.class) @definition.class 3 | 4 | (function_definition 5 | name: (name) @name.definition.function) @definition.function 6 | 7 | (method_declaration 8 | name: (name) @name.definition.function) @definition.function 9 | 10 | (object_creation_expression 11 | [ 12 | (qualified_name (name) @name.reference.class) 13 | (variable_name (name) @name.reference.class) 14 | ]) @reference.class 15 | 16 | (function_call_expression 17 | function: [ 18 | (qualified_name (name) @name.reference.call) 19 | (variable_name (name)) @name.reference.call 20 | ]) @reference.call 21 | 22 | (scoped_call_expression 23 | name: (name) @name.reference.call) @reference.call 24 | 25 | (member_call_expression 26 | name: (name) @name.reference.call) @reference.call 27 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/cpp-tags.scm: -------------------------------------------------------------------------------- 1 | (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class 2 | 3 | (declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class 4 | 5 | (function_declarator declarator: (identifier) @name.definition.function) @definition.function 6 | 7 | (function_declarator declarator: (field_identifier) @name.definition.function) @definition.function 8 | 9 | (function_declarator declarator: (qualified_identifier scope: (namespace_identifier) @scope name: (identifier) @name.definition.method)) @definition.method 10 | 11 | (type_definition declarator: (type_identifier) @name.definition.type) @definition.type 12 | 13 | (enum_specifier name: (type_identifier) @name.definition.type) @definition.type 14 | 15 | (class_specifier name: (type_identifier) @name.definition.class) @definition.class 16 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/go-tags.scm: -------------------------------------------------------------------------------- 1 | ( 2 | (comment)* @doc 3 | . 4 | (function_declaration 5 | name: (identifier) @name.definition.function) @definition.function 6 | (#strip! @doc "^//\\s*") 7 | (#set-adjacent! @doc @definition.function) 8 | ) 9 | 10 | ( 11 | (comment)* @doc 12 | . 13 | (method_declaration 14 | name: (field_identifier) @name.definition.method) @definition.method 15 | (#strip! @doc "^//\\s*") 16 | (#set-adjacent! @doc @definition.method) 17 | ) 18 | 19 | (call_expression 20 | function: [ 21 | (identifier) @name.reference.call 22 | (parenthesized_expression (identifier) @name.reference.call) 23 | (selector_expression field: (field_identifier) @name.reference.call) 24 | (parenthesized_expression (selector_expression field: (field_identifier) @name.reference.call)) 25 | ]) @reference.call 26 | 27 | (type_spec 28 | name: (type_identifier) @name.definition.type) @definition.type 29 | 30 | (type_identifier) @name.reference.type @reference.type 31 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/elm-tags.scm: -------------------------------------------------------------------------------- 1 | (value_declaration (function_declaration_left (lower_case_identifier) @name.definition.function)) @definition.function 2 | 3 | (function_call_expr (value_expr (value_qid) @name.reference.function)) @reference.function 4 | (exposed_value (lower_case_identifier) @name.reference.function) @reference.function 5 | (type_annotation ((lower_case_identifier) @name.reference.function) (colon)) @reference.function 6 | 7 | (type_declaration ((upper_case_identifier) @name.definition.type)) @definition.type 8 | 9 | (type_ref (upper_case_qid (upper_case_identifier) @name.reference.type)) @reference.type 10 | (exposed_type (upper_case_identifier) @name.reference.type) @reference.type 11 | 12 | (type_declaration (union_variant (upper_case_identifier) @name.definition.union)) @definition.union 13 | 14 | (value_expr (upper_case_qid (upper_case_identifier) @name.reference.union)) @reference.union 15 | 16 | 17 | (module_declaration 18 | (upper_case_qid (upper_case_identifier)) @name.definition.module 19 | ) @definition.module 20 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/c_sharp-tags.scm: -------------------------------------------------------------------------------- 1 | (class_declaration 2 | name: (identifier) @name.definition.class 3 | ) @definition.class 4 | 5 | (class_declaration 6 | bases: (base_list (_) @name.reference.class) 7 | ) @reference.class 8 | 9 | (interface_declaration 10 | name: (identifier) @name.definition.interface 11 | ) @definition.interface 12 | 13 | (interface_declaration 14 | bases: (base_list (_) @name.reference.interface) 15 | ) @reference.interface 16 | 17 | (method_declaration 18 | name: (identifier) @name.definition.method 19 | ) @definition.method 20 | 21 | (object_creation_expression 22 | type: (identifier) @name.reference.class 23 | ) @reference.class 24 | 25 | (type_parameter_constraints_clause 26 | target: (identifier) @name.reference.class 27 | ) @reference.class 28 | 29 | (type_constraint 30 | type: (identifier) @name.reference.class 31 | ) @reference.class 32 | 33 | (variable_declaration 34 | type: (identifier) @name.reference.class 35 | ) @reference.class 36 | 37 | (invocation_expression 38 | function: 39 | (member_access_expression 40 | name: (identifier) @name.reference.send 41 | ) 42 | ) @reference.send 43 | 44 | (namespace_declaration 45 | name: (identifier) @name.definition.module 46 | ) @definition.module 47 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/typescript-tags.scm: -------------------------------------------------------------------------------- 1 | (function_signature 2 | name: (identifier) @name.definition.function) @definition.function 3 | 4 | (method_signature 5 | name: (property_identifier) @name.definition.method) @definition.method 6 | 7 | (abstract_method_signature 8 | name: (property_identifier) @name.definition.method) @definition.method 9 | 10 | (abstract_class_declaration 11 | name: (type_identifier) @name.definition.class) @definition.class 12 | 13 | (module 14 | name: (identifier) @name.definition.module) @definition.module 15 | 16 | (interface_declaration 17 | name: (type_identifier) @name.definition.interface) @definition.interface 18 | 19 | (type_annotation 20 | (type_identifier) @name.reference.type) @reference.type 21 | 22 | (new_expression 23 | constructor: (identifier) @name.reference.class) @reference.class 24 | 25 | (function_declaration 26 | name: (identifier) @name.definition.function) @definition.function 27 | 28 | (method_definition 29 | name: (property_identifier) @name.definition.method) @definition.method 30 | 31 | (class_declaration 32 | name: (type_identifier) @name.definition.class) @definition.class 33 | 34 | (interface_declaration 35 | name: (type_identifier) @name.definition.class) @definition.class 36 | 37 | (type_alias_declaration 38 | name: (type_identifier) @name.definition.type) @definition.type 39 | 40 | (enum_declaration 41 | name: (identifier) @name.definition.enum) @definition.enum 42 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/ruby-tags.scm: -------------------------------------------------------------------------------- 1 | ; Method definitions 2 | 3 | ( 4 | (comment)* @doc 5 | . 6 | [ 7 | (method 8 | name: (_) @name.definition.method) @definition.method 9 | (singleton_method 10 | name: (_) @name.definition.method) @definition.method 11 | ] 12 | (#strip! @doc "^#\\s*") 13 | (#select-adjacent! @doc @definition.method) 14 | ) 15 | 16 | (alias 17 | name: (_) @name.definition.method) @definition.method 18 | 19 | (setter 20 | (identifier) @ignore) 21 | 22 | ; Class definitions 23 | 24 | ( 25 | (comment)* @doc 26 | . 27 | [ 28 | (class 29 | name: [ 30 | (constant) @name.definition.class 31 | (scope_resolution 32 | name: (_) @name.definition.class) 33 | ]) @definition.class 34 | (singleton_class 35 | value: [ 36 | (constant) @name.definition.class 37 | (scope_resolution 38 | name: (_) @name.definition.class) 39 | ]) @definition.class 40 | ] 41 | (#strip! @doc "^#\\s*") 42 | (#select-adjacent! @doc @definition.class) 43 | ) 44 | 45 | ; Module definitions 46 | 47 | ( 48 | (module 49 | name: [ 50 | (constant) @name.definition.module 51 | (scope_resolution 52 | name: (_) @name.definition.module) 53 | ]) @definition.module 54 | ) 55 | 56 | ; Calls 57 | 58 | (call method: (identifier) @name.reference.call) @reference.call 59 | 60 | ( 61 | [(identifier) (constant)] @name.reference.call @reference.call 62 | (#is-not? local) 63 | (#not-match? @name.reference.call "^(lambda|load|require|require_relative|__FILE__|__LINE__)$") 64 | ) 65 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/rust-tags.scm: -------------------------------------------------------------------------------- 1 | ; ADT definitions 2 | 3 | (struct_item 4 | name: (type_identifier) @name.definition.class) @definition.class 5 | 6 | (enum_item 7 | name: (type_identifier) @name.definition.class) @definition.class 8 | 9 | (union_item 10 | name: (type_identifier) @name.definition.class) @definition.class 11 | 12 | ; type aliases 13 | 14 | (type_item 15 | name: (type_identifier) @name.definition.class) @definition.class 16 | 17 | ; method definitions 18 | 19 | (declaration_list 20 | (function_item 21 | name: (identifier) @name.definition.method)) @definition.method 22 | 23 | ; function definitions 24 | 25 | (function_item 26 | name: (identifier) @name.definition.function) @definition.function 27 | 28 | ; trait definitions 29 | (trait_item 30 | name: (type_identifier) @name.definition.interface) @definition.interface 31 | 32 | ; module definitions 33 | (mod_item 34 | name: (identifier) @name.definition.module) @definition.module 35 | 36 | ; macro definitions 37 | 38 | (macro_definition 39 | name: (identifier) @name.definition.macro) @definition.macro 40 | 41 | ; references 42 | 43 | (call_expression 44 | function: (identifier) @name.reference.call) @reference.call 45 | 46 | (call_expression 47 | function: (field_expression 48 | field: (field_identifier) @name.reference.call)) @reference.call 49 | 50 | (macro_invocation 51 | macro: (identifier) @name.reference.call) @reference.call 52 | 53 | ; implementations 54 | 55 | (impl_item 56 | trait: (type_identifier) @name.reference.implementation) @reference.implementation 57 | 58 | (impl_item 59 | type: (type_identifier) @name.reference.implementation 60 | !trait) @reference.implementation 61 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/elixir-tags.scm: -------------------------------------------------------------------------------- 1 | ; Definitions 2 | 3 | ; * modules and protocols 4 | (call 5 | target: (identifier) @ignore 6 | (arguments (alias) @name.definition.module) 7 | (#match? @ignore "^(defmodule|defprotocol)$")) @definition.module 8 | 9 | ; * functions/macros 10 | (call 11 | target: (identifier) @ignore 12 | (arguments 13 | [ 14 | ; zero-arity functions with no parentheses 15 | (identifier) @name.definition.function 16 | ; regular function clause 17 | (call target: (identifier) @name.definition.function) 18 | ; function clause with a guard clause 19 | (binary_operator 20 | left: (call target: (identifier) @name.definition.function) 21 | operator: "when") 22 | ]) 23 | (#match? @ignore "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @definition.function 24 | 25 | ; References 26 | 27 | ; ignore calls to kernel/special-forms keywords 28 | (call 29 | target: (identifier) @ignore 30 | (#match? @ignore "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp|defmodule|defprotocol|defimpl|defstruct|defexception|defoverridable|alias|case|cond|else|for|if|import|quote|raise|receive|require|reraise|super|throw|try|unless|unquote|unquote_splicing|use|with)$")) 31 | 32 | ; ignore module attributes 33 | (unary_operator 34 | operator: "@" 35 | operand: (call 36 | target: (identifier) @ignore)) 37 | 38 | ; * function call 39 | (call 40 | target: [ 41 | ; local 42 | (identifier) @name.reference.call 43 | ; remote 44 | (dot 45 | right: (identifier) @name.reference.call) 46 | ]) @reference.call 47 | 48 | ; * pipe into function call 49 | (binary_operator 50 | operator: "|>" 51 | right: (identifier) @name.reference.call) @reference.call 52 | 53 | ; * modules 54 | (alias) @name.reference.module @reference.module 55 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/hcl-tags.scm: -------------------------------------------------------------------------------- 1 | ;; Based on https://github.com/tree-sitter-grammars/tree-sitter-hcl/blob/main/make_grammar.js 2 | ;; Which has Apache 2.0 License 3 | ;; tags.scm for Terraform (tree-sitter-hcl) 4 | 5 | ; === Definitions: Terraform Blocks === 6 | (block 7 | (identifier) @block_type 8 | (string_lit (template_literal) @resource_type) 9 | (string_lit (template_literal) @name.definition.resource) 10 | (body) @definition.resource 11 | ) (#eq? @block_type "resource") 12 | 13 | (block 14 | (identifier) @block_type 15 | (string_lit (template_literal) @name.definition.module) 16 | (body) @definition.module 17 | ) (#eq? @block_type "module") 18 | 19 | (block 20 | (identifier) @block_type 21 | (string_lit (template_literal) @name.definition.variable) 22 | (body) @definition.variable 23 | ) (#eq? @block_type "variable") 24 | 25 | (block 26 | (identifier) @block_type 27 | (string_lit (template_literal) @name.definition.output) 28 | (body) @definition.output 29 | ) (#eq? @block_type "output") 30 | 31 | (block 32 | (identifier) @block_type 33 | (string_lit (template_literal) @name.definition.provider) 34 | (body) @definition.provider 35 | ) (#eq? @block_type "provider") 36 | 37 | (block 38 | (identifier) @block_type 39 | (body 40 | (attribute 41 | (identifier) @name.definition.local 42 | (expression) @definition.local 43 | )+ 44 | ) 45 | ) (#eq? @block_type "locals") 46 | 47 | ; === References: Variables, Locals, Modules, Data, Resources === 48 | ((variable_expr) @ref_type 49 | (get_attr (identifier) @name.reference.variable) 50 | ) @reference.variable 51 | (#eq? @ref_type "var") 52 | 53 | ((variable_expr) @ref_type 54 | (get_attr (identifier) @name.reference.local) 55 | ) @reference.local 56 | (#eq? @ref_type "local") 57 | 58 | ((variable_expr) @ref_type 59 | (get_attr (identifier) @name.reference.module) 60 | ) @reference.module 61 | (#eq? @ref_type "module") 62 | 63 | ((variable_expr) @ref_type 64 | (get_attr (identifier) @data_source_type) 65 | (get_attr (identifier) @name.reference.data) 66 | ) @reference.data 67 | (#eq? @ref_type "data") 68 | 69 | ((variable_expr) @resource_type 70 | (get_attr (identifier) @name.reference.resource) 71 | ) @reference.resource 72 | (#not-eq? @resource_type "var") 73 | (#not-eq? @resource_type "local") 74 | (#not-eq? @resource_type "module") 75 | (#not-eq? @resource_type "data") 76 | (#not-eq? @resource_type "provider") 77 | (#not-eq? @resource_type "output") 78 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Credits 3 | 4 | Aider uses modified versions of the tags.scm files from these open source 5 | tree-sitter language implementations: 6 | 7 | * [https://github.com/tree-sitter/tree-sitter-c](https://github.com/tree-sitter/tree-sitter-c) — licensed under the MIT License. 8 | * [https://github.com/tree-sitter/tree-sitter-c-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp) — licensed under the MIT License. 9 | * [https://github.com/tree-sitter/tree-sitter-cpp](https://github.com/tree-sitter/tree-sitter-cpp) — licensed under the MIT License. 10 | * [https://github.com/Wilfred/tree-sitter-elisp](https://github.com/Wilfred/tree-sitter-elisp) — licensed under the MIT License. 11 | * [https://github.com/elixir-lang/tree-sitter-elixir](https://github.com/elixir-lang/tree-sitter-elixir) — licensed under the Apache License, Version 2.0. 12 | * [https://github.com/elm-tooling/tree-sitter-elm](https://github.com/elm-tooling/tree-sitter-elm) — licensed under the MIT License. 13 | * [https://github.com/tree-sitter/tree-sitter-go](https://github.com/tree-sitter/tree-sitter-go) — licensed under the MIT License. 14 | * [https://github.com/tree-sitter/tree-sitter-java](https://github.com/tree-sitter/tree-sitter-java) — licensed under the MIT License. 15 | * [https://github.com/tree-sitter/tree-sitter-javascript](https://github.com/tree-sitter/tree-sitter-javascript) — licensed under the MIT License. 16 | * [https://github.com/tree-sitter/tree-sitter-ocaml](https://github.com/tree-sitter/tree-sitter-ocaml) — licensed under the MIT License. 17 | * [https://github.com/tree-sitter/tree-sitter-php](https://github.com/tree-sitter/tree-sitter-php) — licensed under the MIT License. 18 | * [https://github.com/tree-sitter/tree-sitter-python](https://github.com/tree-sitter/tree-sitter-python) — licensed under the MIT License. 19 | * [https://github.com/tree-sitter/tree-sitter-ql](https://github.com/tree-sitter/tree-sitter-ql) — licensed under the MIT License. 20 | * [https://github.com/r-lib/tree-sitter-r](https://github.com/r-lib/tree-sitter-r) — licensed under the MIT License. 21 | * [https://github.com/tree-sitter/tree-sitter-ruby](https://github.com/tree-sitter/tree-sitter-ruby) — licensed under the MIT License. 22 | * [https://github.com/tree-sitter/tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) — licensed under the MIT License. 23 | * [https://github.com/tree-sitter/tree-sitter-typescript](https://github.com/tree-sitter/tree-sitter-typescript) — licensed under the MIT License. 24 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/javascript-tags.scm: -------------------------------------------------------------------------------- 1 | ( 2 | (comment)* @doc 3 | . 4 | (method_definition 5 | name: (property_identifier) @name.definition.method) @definition.method 6 | (#not-eq? @name.definition.method "constructor") 7 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 8 | (#select-adjacent! @doc @definition.method) 9 | ) 10 | 11 | ( 12 | (comment)* @doc 13 | . 14 | [ 15 | (class 16 | name: (_) @name.definition.class) 17 | (class_declaration 18 | name: (_) @name.definition.class) 19 | ] @definition.class 20 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 21 | (#select-adjacent! @doc @definition.class) 22 | ) 23 | 24 | ( 25 | (comment)* @doc 26 | . 27 | [ 28 | (function 29 | name: (identifier) @name.definition.function) 30 | (function_declaration 31 | name: (identifier) @name.definition.function) 32 | (generator_function 33 | name: (identifier) @name.definition.function) 34 | (generator_function_declaration 35 | name: (identifier) @name.definition.function) 36 | ] @definition.function 37 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 38 | (#select-adjacent! @doc @definition.function) 39 | ) 40 | 41 | ( 42 | (comment)* @doc 43 | . 44 | (lexical_declaration 45 | (variable_declarator 46 | name: (identifier) @name.definition.function 47 | value: [(arrow_function) (function)]) @definition.function) 48 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 49 | (#select-adjacent! @doc @definition.function) 50 | ) 51 | 52 | ( 53 | (comment)* @doc 54 | . 55 | (variable_declaration 56 | (variable_declarator 57 | name: (identifier) @name.definition.function 58 | value: [(arrow_function) (function)]) @definition.function) 59 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 60 | (#select-adjacent! @doc @definition.function) 61 | ) 62 | 63 | (assignment_expression 64 | left: [ 65 | (identifier) @name.definition.function 66 | (member_expression 67 | property: (property_identifier) @name.definition.function) 68 | ] 69 | right: [(arrow_function) (function)] 70 | ) @definition.function 71 | 72 | (pair 73 | key: (property_identifier) @name.definition.function 74 | value: [(arrow_function) (function)]) @definition.function 75 | 76 | ( 77 | (call_expression 78 | function: (identifier) @name.reference.call) @reference.call 79 | (#not-match? @name.reference.call "^(require)$") 80 | ) 81 | 82 | (call_expression 83 | function: (member_expression 84 | property: (property_identifier) @name.reference.call) 85 | arguments: (_) @reference.call) 86 | 87 | (new_expression 88 | constructor: (_) @name.reference.class) @reference.class 89 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/dart-tags.scm: -------------------------------------------------------------------------------- 1 | (class_definition 2 | name: (identifier) @name.definition.class) @definition.class 3 | 4 | (method_signature 5 | (function_signature)) @definition.method 6 | 7 | (type_alias 8 | (type_identifier) @name.definition.type) @definition.type 9 | 10 | (method_signature 11 | (getter_signature 12 | name: (identifier) @name.definition.method)) @definition.method 13 | 14 | (method_signature 15 | (setter_signature 16 | name: (identifier) @name.definition.method)) @definition.method 17 | 18 | (method_signature 19 | (function_signature 20 | name: (identifier) @name.definition.method)) @definition.method 21 | 22 | (method_signature 23 | (factory_constructor_signature 24 | (identifier) @name.definition.method)) @definition.method 25 | 26 | (method_signature 27 | (constructor_signature 28 | name: (identifier) @name.definition.method)) @definition.method 29 | 30 | (method_signature 31 | (operator_signature)) @definition.method 32 | 33 | (method_signature) @definition.method 34 | 35 | (mixin_declaration 36 | (mixin) 37 | (identifier) @name.definition.mixin) @definition.mixin 38 | 39 | (extension_declaration 40 | name: (identifier) @name.definition.extension) @definition.extension 41 | 42 | (enum_declaration 43 | name: (identifier) @name.definition.enum) @definition.enum 44 | 45 | (function_signature 46 | name: (identifier) @name.definition.function) @definition.function 47 | 48 | (new_expression 49 | (type_identifier) @name.reference.class) @reference.class 50 | 51 | (initialized_variable_definition 52 | name: (identifier) 53 | value: (identifier) @name.reference.class 54 | value: (selector 55 | "!"? 56 | (argument_part 57 | (arguments 58 | (argument)*))?)?) @reference.class 59 | 60 | (assignment_expression 61 | left: (assignable_expression 62 | (identifier) 63 | (unconditional_assignable_selector 64 | "." 65 | (identifier) @name.reference.call))) @reference.call 66 | 67 | (assignment_expression 68 | left: (assignable_expression 69 | (identifier) 70 | (conditional_assignable_selector 71 | "?." 72 | (identifier) @name.reference.call))) @reference.call 73 | 74 | ((identifier) @name 75 | (selector 76 | "!"? 77 | (conditional_assignable_selector 78 | "?." (identifier) @name.reference.call)? 79 | (unconditional_assignable_selector 80 | "."? (identifier) @name.reference.call)? 81 | (argument_part 82 | (arguments 83 | (argument)*))?)* 84 | (cascade_section 85 | (cascade_selector 86 | (identifier)) @name.reference.call 87 | (argument_part 88 | (arguments 89 | (argument)*))?)?) @reference.call 90 | 91 | 92 | -------------------------------------------------------------------------------- /queries/tree-sitter-language-pack/javascript-tags.scm: -------------------------------------------------------------------------------- 1 | ( 2 | (comment)* @doc 3 | . 4 | (method_definition 5 | name: (property_identifier) @name.definition.method) @definition.method 6 | (#not-eq? @name.definition.method "constructor") 7 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 8 | (#select-adjacent! @doc @definition.method) 9 | ) 10 | 11 | ( 12 | (comment)* @doc 13 | . 14 | [ 15 | (class 16 | name: (_) @name.definition.class) 17 | (class_declaration 18 | name: (_) @name.definition.class) 19 | ] @definition.class 20 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 21 | (#select-adjacent! @doc @definition.class) 22 | ) 23 | 24 | ( 25 | (comment)* @doc 26 | . 27 | [ 28 | (function_expression 29 | name: (identifier) @name.definition.function) 30 | (function_declaration 31 | name: (identifier) @name.definition.function) 32 | (generator_function 33 | name: (identifier) @name.definition.function) 34 | (generator_function_declaration 35 | name: (identifier) @name.definition.function) 36 | ] @definition.function 37 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 38 | (#select-adjacent! @doc @definition.function) 39 | ) 40 | 41 | ( 42 | (comment)* @doc 43 | . 44 | (lexical_declaration 45 | (variable_declarator 46 | name: (identifier) @name.definition.function 47 | value: [(arrow_function) (function_expression)]) @definition.function) 48 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 49 | (#select-adjacent! @doc @definition.function) 50 | ) 51 | 52 | ( 53 | (comment)* @doc 54 | . 55 | (variable_declaration 56 | (variable_declarator 57 | name: (identifier) @name.definition.function 58 | value: [(arrow_function) (function_expression)]) @definition.function) 59 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 60 | (#select-adjacent! @doc @definition.function) 61 | ) 62 | 63 | (assignment_expression 64 | left: [ 65 | (identifier) @name.definition.function 66 | (member_expression 67 | property: (property_identifier) @name.definition.function) 68 | ] 69 | right: [(arrow_function) (function_expression)] 70 | ) @definition.function 71 | 72 | (pair 73 | key: (property_identifier) @name.definition.function 74 | value: [(arrow_function) (function_expression)]) @definition.function 75 | 76 | ( 77 | (call_expression 78 | function: (identifier) @name.reference.call) @reference.call 79 | (#not-match? @name.reference.call "^(require)$") 80 | ) 81 | 82 | (call_expression 83 | function: (member_expression 84 | property: (property_identifier) @name.reference.call) 85 | arguments: (_) @reference.call) 86 | 87 | (new_expression 88 | constructor: (_) @name.reference.class) @reference.class 89 | -------------------------------------------------------------------------------- /REPLIT_SETUP_COMPLETE.md: -------------------------------------------------------------------------------- 1 | # ✅ Emigo Setup Complete on Replit 2 | 3 | ## Summary 4 | 5 | This Emigo project has been successfully set up on Replit. All Python dependencies are installed and validated. 6 | 7 | ## What Was Done 8 | 9 | ### 1. Python Environment 10 | - ✅ Installed Python 3.11 11 | - ✅ Installed all dependencies from requirements.txt: 12 | - litellm (LLM provider integration) 13 | - epc (Emacs-Python communication) 14 | - networkx, scipy (graph/math operations) 15 | - tiktoken (token counting) 16 | - grep-ast, tree-sitter (code parsing) 17 | - And 30+ other dependencies 18 | 19 | ### 2. Project Configuration 20 | - ✅ Created `.gitignore` with Python and Emacs patterns 21 | - ✅ Created `replit.md` with project documentation 22 | - ✅ Created `test_setup.py` validation script 23 | - ✅ Created `README_REPLIT.md` usage guide 24 | 25 | ### 3. Validation 26 | - ✅ All 11 Python dependencies import successfully 27 | - ✅ All 11 core modules pass syntax validation 28 | - ✅ Setup validation workflow runs successfully 29 | 30 | ## Current Status 31 | 32 | **The project is ready to use!** 33 | 34 | ### For Emacs Users: 35 | 1. Clone this repository to your local machine 36 | 2. Install in Emacs using straight.el (see README_REPLIT.md) 37 | 3. Configure your API keys 38 | 4. Run `M-x emigo` in your project 39 | 40 | ### For Developers on Replit: 41 | 1. Click "Run" to see validation test results 42 | 2. Modify Python backend files as needed 43 | 3. Test with: `python3 test_setup.py` 44 | 45 | ## Important Notes 46 | 47 | ⚠️ **This is an Emacs plugin, not a web application** 48 | - No web server runs on port 5000 49 | - Requires Emacs to function 50 | - Python backend communicates with Emacs via EPC 51 | 52 | ## Files Added for Replit 53 | 54 | 1. `replit.md` - Project overview and architecture 55 | 2. `test_setup.py` - Setup validation script 56 | 3. `README_REPLIT.md` - Replit-specific usage guide 57 | 4. `REPLIT_SETUP_COMPLETE.md` - This summary 58 | 5. `.gitignore` - Updated with Python/Emacs patterns 59 | 60 | ## Validation Results 61 | 62 | ``` 63 | Testing Python dependencies... ✅ 11/11 OK 64 | Testing Emigo core modules... ✅ 11/11 OK 65 | 66 | ✅ SETUP VALIDATION PASSED 67 | ``` 68 | 69 | All modules loaded: 70 | - epc, networkx, pygments, grep_ast, diskcache 71 | - tiktoken, tqdm, gitignore_parser, scipy 72 | - litellm, orjson 73 | 74 | All code validated: 75 | - config.py, utils.py, session.py, tools.py 76 | - tool_definitions.py, agent.py, llm.py 77 | - llm_providers.py, llm_worker.py 78 | - repomapper.py, emigo.py 79 | 80 | ## Next Steps 81 | 82 | Choose your path: 83 | 84 | **A. Use in Emacs** (recommended) 85 | - See README_REPLIT.md for installation 86 | 87 | **B. Develop on Replit** 88 | - All Python dependencies are ready 89 | - Make changes and test with validation script 90 | 91 | **C. Install Emacs on Replit** (advanced) 92 | ```bash 93 | nix-env -iA nixpkgs.emacs 94 | ``` 95 | 96 | --- 97 | 98 | Setup completed: October 13, 2025 99 | -------------------------------------------------------------------------------- /llm_providers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Handles formatting tool definitions for specific LLM providers. 6 | 7 | Currently supports formatting for OpenAI's function/tool calling API. 8 | Can be extended to support other providers like Anthropic, Google Gemini, etc. 9 | """ 10 | 11 | from typing import List, Dict, Any 12 | from tool_definitions import ToolDefinition 13 | 14 | def format_tools_for_openai(tools: List[ToolDefinition]) -> List[Dict[str, Any]]: 15 | """ 16 | Formats a list of ToolDefinition objects into the JSON structure 17 | expected by OpenAI's Chat Completions API (for tool_choice='auto'). 18 | """ 19 | openai_tools = [] 20 | for tool in tools: 21 | properties = {} 22 | required_params = [] 23 | for param in tool.get('parameters', []): # Use .get for safety 24 | # Basic JSON schema type mapping 25 | param_type = param.get('type', 'string') # Default to string if missing 26 | properties[param['name']] = { 27 | "type": param_type, 28 | "description": param.get('description', '') # Default description 29 | } 30 | if param_type == "array": 31 | properties[param['name']]['items'] = param.get('items', {"type": "string"}) 32 | 33 | if param.get('required', False): # Default to not required 34 | required_params.append(param['name']) 35 | 36 | openai_tools.append({ 37 | "type": "function", 38 | "function": { 39 | "name": tool.get('name', 'unknown_tool'), # Default name 40 | "description": tool.get('description', ''), # Default description 41 | "parameters": { 42 | "type": "object", 43 | "properties": properties, 44 | "required": required_params 45 | } 46 | } 47 | }) 48 | return openai_tools 49 | 50 | # --- Add formatters for other providers as needed --- 51 | # def format_tools_for_anthropic(tools: List[ToolDefinition]) -> List[Dict[str, Any]]: 52 | # # Implementation for Anthropic's tool format 53 | # pass 54 | 55 | # def format_tools_for_google(tools: List[ToolDefinition]) -> List[Dict[str, Any]]: 56 | # # Implementation for Google Gemini's tool format 57 | # pass 58 | 59 | # --- Provider Selection Logic (Example) --- 60 | # You might have logic elsewhere to choose the correct formatter based on the LLM model name 61 | def get_formatted_tools(tools: List[ToolDefinition], model_name: str) -> List[Dict[str, Any]]: 62 | """Selects the appropriate formatter based on the model name.""" 63 | # Simple example: default to OpenAI format 64 | # Add more sophisticated logic if supporting multiple providers 65 | if "claude" in model_name.lower(): 66 | # return format_tools_for_anthropic(tools) 67 | pass # Placeholder 68 | elif "gemini" in model_name.lower(): 69 | # return format_tools_for_google(tools) 70 | pass # Placeholder 71 | else: # Default to OpenAI 72 | return format_tools_for_openai(tools) 73 | 74 | # Fallback if no specific provider matched 75 | return format_tools_for_openai(tools) 76 | -------------------------------------------------------------------------------- /queries/tree-sitter-languages/ocaml-tags.scm: -------------------------------------------------------------------------------- 1 | ; Modules 2 | ;-------- 3 | 4 | ( 5 | (comment)? @doc . 6 | (module_definition (module_binding (module_name) @name.definition.module) @definition.module) 7 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$") 8 | ) 9 | 10 | (module_path (module_name) @name.reference.module) @reference.module 11 | 12 | ; Module types 13 | ;-------------- 14 | 15 | ( 16 | (comment)? @doc . 17 | (module_type_definition (module_type_name) @name.definition.interface) @definition.interface 18 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$") 19 | ) 20 | 21 | (module_type_path (module_type_name) @name.reference.implementation) @reference.implementation 22 | 23 | ; Functions 24 | ;---------- 25 | 26 | ( 27 | (comment)? @doc . 28 | (value_definition 29 | [ 30 | (let_binding 31 | pattern: (value_name) @name.definition.function 32 | (parameter)) 33 | (let_binding 34 | pattern: (value_name) @name.definition.function 35 | body: [(fun_expression) (function_expression)]) 36 | ] @definition.function 37 | ) 38 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$") 39 | ) 40 | 41 | ( 42 | (comment)? @doc . 43 | (external (value_name) @name.definition.function) @definition.function 44 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$") 45 | ) 46 | 47 | (application_expression 48 | function: (value_path (value_name) @name.reference.call)) @reference.call 49 | 50 | (infix_expression 51 | left: (value_path (value_name) @name.reference.call) 52 | operator: (concat_operator) @reference.call 53 | (#eq? @reference.call "@@")) 54 | 55 | (infix_expression 56 | operator: (rel_operator) @reference.call 57 | right: (value_path (value_name) @name.reference.call) 58 | (#eq? @reference.call "|>")) 59 | 60 | ; Operator 61 | ;--------- 62 | 63 | ( 64 | (comment)? @doc . 65 | (value_definition 66 | (let_binding 67 | pattern: (parenthesized_operator (_) @name.definition.function)) @definition.function) 68 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$") 69 | ) 70 | 71 | [ 72 | (prefix_operator) 73 | (sign_operator) 74 | (pow_operator) 75 | (mult_operator) 76 | (add_operator) 77 | (concat_operator) 78 | (rel_operator) 79 | (and_operator) 80 | (or_operator) 81 | (assign_operator) 82 | (hash_operator) 83 | (indexing_operator) 84 | (let_operator) 85 | (let_and_operator) 86 | (match_operator) 87 | ] @name.reference.call @reference.call 88 | 89 | ; Classes 90 | ;-------- 91 | 92 | ( 93 | (comment)? @doc . 94 | [ 95 | (class_definition (class_binding (class_name) @name.definition.class) @definition.class) 96 | (class_type_definition (class_type_binding (class_type_name) @name.definition.class) @definition.class) 97 | ] 98 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$") 99 | ) 100 | 101 | [ 102 | (class_path (class_name) @name.reference.class) 103 | (class_type_path (class_type_name) @name.reference.class) 104 | ] @reference.class 105 | 106 | ; Methods 107 | ;-------- 108 | 109 | ( 110 | (comment)? @doc . 111 | (method_definition (method_name) @name.definition.method) @definition.method 112 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$") 113 | ) 114 | 115 | (method_invocation (method_name) @name.reference.call) @reference.call 116 | -------------------------------------------------------------------------------- /replit.md: -------------------------------------------------------------------------------- 1 | # Emigo - AI Assistant for Emacs 2 | 3 | ## Project Overview 4 | 5 | **Emigo** is an intelligent, agentic Emacs-native AI assistant that understands and interacts with your codebase. It's designed to run as a Python backend that communicates with Emacs via EPC (Emacs Process Communication). 6 | 7 | **Key Features:** 8 | - Agentic tool use - interacts with your environment based on LLM reasoning 9 | - Native Emacs integration 10 | - Flexible LLM support via LiteLLM (OpenRouter, Deepseek, etc.) 11 | - Context-aware interactions with chat history and project context 12 | 13 | ## Project Type 14 | 15 | This is **NOT a web application** - it's an Emacs plugin with a Python backend. The Python code (`emigo.py`) runs as an EPC server that Emacs connects to for AI-powered coding assistance. 16 | 17 | ## Architecture 18 | 19 | ### Main Components: 20 | 1. **emigo.el** - Emacs Lisp frontend 21 | 2. **emigo.py** - Python EPC orchestrator (main backend) 22 | 3. **llm_worker.py** - LLM interaction subprocess 23 | 4. **session.py** - Session management 24 | 5. **tools.py** - Tool implementations (read_file, write_file, execute_command, etc.) 25 | 6. **repomapper.py** - Repository mapping for context 26 | 27 | ### How It Works: 28 | 1. Emacs starts the Python backend (`emigo.py`) as a subprocess 29 | 2. Python starts an EPC server on a random port 30 | 3. Emacs connects to the EPC server 31 | 4. User interacts via Emacs buffers 32 | 5. LLM worker handles AI interactions and tool execution 33 | 34 | ## Replit Setup Status 35 | 36 | ✅ **Python 3.11 installed** 37 | ✅ **All Python dependencies installed** (litellm, epc, networkx, etc.) 38 | ✅ **Project is ready to use** 39 | 40 | ## Usage in Emacs 41 | 42 | This project is designed to be used from within Emacs: 43 | 44 | 1. **Install in Emacs** using straight.el: 45 | ```emacs-lisp 46 | (use-package emigo 47 | :straight (:host github :repo "MatthewZMD/emigo" :files (:defaults "*.py" "*.el")) 48 | :config 49 | (emigo-enable) 50 | :custom 51 | (emigo-model "openrouter/deepseek/deepseek-chat-v3-0324") 52 | (emigo-base-url "https://openrouter.ai/api/v1") 53 | (emigo-api-key (getenv "OPENROUTER_API_KEY"))) 54 | ``` 55 | 56 | 2. **Basic Usage:** 57 | - `M-x emigo` - Start Emigo in your project 58 | - Type prompts and press `C-c C-c` to send 59 | - Use `@filename` to add files to context 60 | - `C-c f` - Add files interactively 61 | - `C-c l` - List files in context 62 | - `C-c H` - Clear chat history 63 | 64 | ## Testing on Replit 65 | 66 | Since this is an Emacs plugin and Replit doesn't have Emacs pre-installed, you can: 67 | 68 | 1. **Validate Python Setup:** 69 | ```bash 70 | python3 --version # Check Python 71 | pip list | grep -E "(litellm|epc)" # Verify dependencies 72 | ``` 73 | 74 | 2. **Check Code Structure:** 75 | ```bash 76 | python3 -m py_compile emigo.py # Syntax check 77 | ``` 78 | 79 | 3. **Install Emacs (optional):** 80 | If you want to actually run Emigo in Replit, you would need to install Emacs first. 81 | 82 | ## Development Notes 83 | 84 | - Written in Python 3.x (tested with 3.11) 85 | - Uses EPC for Emacs-Python communication 86 | - LLM interactions via LiteLLM (supports multiple providers) 87 | - Active development - expect breaking changes 88 | 89 | ## Recent Setup 90 | 91 | - 2025-10-13: Imported to Replit, installed Python 3.11 and all dependencies 92 | -------------------------------------------------------------------------------- /test_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Test script to validate Emigo setup on Replit. 4 | Since Emigo is an Emacs plugin, this script validates that: 5 | 1. All Python dependencies are installed 6 | 2. Core modules can be imported 7 | 3. Configuration is valid 8 | """ 9 | 10 | import sys 11 | import importlib 12 | 13 | def test_imports(): 14 | """Test that all required modules can be imported.""" 15 | required_modules = [ 16 | 'epc', 17 | 'networkx', 18 | 'pygments', 19 | 'grep_ast', 20 | 'diskcache', 21 | 'tiktoken', 22 | 'tqdm', 23 | 'gitignore_parser', 24 | 'scipy', 25 | 'litellm', 26 | 'orjson', 27 | ] 28 | 29 | print("Testing Python dependencies...") 30 | print("-" * 50) 31 | 32 | failed = [] 33 | for module in required_modules: 34 | try: 35 | importlib.import_module(module) 36 | print(f"✓ {module:20s} - OK") 37 | except ImportError as e: 38 | print(f"✗ {module:20s} - FAILED: {e}") 39 | failed.append(module) 40 | 41 | print("-" * 50) 42 | if failed: 43 | print(f"\n❌ {len(failed)} module(s) failed to import: {', '.join(failed)}") 44 | return False 45 | else: 46 | print("\n✅ All dependencies imported successfully!") 47 | return True 48 | 49 | def test_core_modules(): 50 | """Test that Emigo core modules are valid.""" 51 | print("\nTesting Emigo core modules...") 52 | print("-" * 50) 53 | 54 | core_modules = [ 55 | 'config', 56 | 'utils', 57 | 'session', 58 | 'tools', 59 | 'tool_definitions', 60 | 'agent', 61 | 'llm', 62 | 'llm_providers', 63 | 'llm_worker', 64 | 'repomapper', 65 | 'emigo', 66 | ] 67 | 68 | failed = [] 69 | for module in core_modules: 70 | try: 71 | # Just compile, don't import (to avoid EPC connection issues) 72 | with open(f"{module}.py", 'r') as f: 73 | compile(f.read(), f"{module}.py", 'exec') 74 | print(f"✓ {module}.py - Syntax OK") 75 | except Exception as e: 76 | print(f"✗ {module}.py - FAILED: {e}") 77 | failed.append(module) 78 | 79 | print("-" * 50) 80 | if failed: 81 | print(f"\n❌ {len(failed)} module(s) have issues: {', '.join(failed)}") 82 | return False 83 | else: 84 | print("\n✅ All core modules are syntactically valid!") 85 | return True 86 | 87 | def main(): 88 | print("=" * 50) 89 | print("Emigo Setup Validation Test") 90 | print("=" * 50) 91 | print() 92 | 93 | # Test dependencies 94 | deps_ok = test_imports() 95 | 96 | # Test core modules 97 | core_ok = test_core_modules() 98 | 99 | print("\n" + "=" * 50) 100 | if deps_ok and core_ok: 101 | print("✅ SETUP VALIDATION PASSED") 102 | print("\nEmigo is ready to use from Emacs!") 103 | print("\nNote: This is an Emacs plugin. To use it:") 104 | print("1. Install in Emacs using straight.el") 105 | print("2. Configure API keys (OPENROUTER_API_KEY, etc.)") 106 | print("3. Run M-x emigo in your project") 107 | return 0 108 | else: 109 | print("❌ SETUP VALIDATION FAILED") 110 | print("\nSome components need attention.") 111 | return 1 112 | print("=" * 50) 113 | 114 | if __name__ == "__main__": 115 | sys.exit(main()) 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # Emigo: Future of Agentic Development in Emacs 6 | 7 | Emigo intends to be an intelligent, agentic Emacs-native AI assistant that understands and interacts with your codebase. Brought to you by the creators of [Emacs Application Framework](https://github.com/emacs-eaf/emacs-application-framework) and [lsp-bridge](https://github.com/manateelazycat/lsp-bridge), built on the shoulders of giants like [Cursor](https://www.cursor.com/en), [Aider](https://github.com/paul-gauthier/aider) and [Cline](https://github.com/sturdy-dev/cline), Emigo is the spiritual successor to [Aidermacs](https://github.com/MatthewZMD/aidermacs), reimagined from the ground up for Emacs. 8 | 9 | ## ⚠️ Emigo is under *very* active development, experimentation, and rapid-prototyping ⚠️ 10 | 11 | The project is at its early-stage. Expect frequent breaking changes and unstable features. Please use only for testing, try it out and report issues - your feedback helps shape Emigo! 12 | 13 | ## Key Features 14 | 15 | * **Agentic Tool Use:** Emigo doesn't just generate text; it uses tools to interact with your environment based on the LLM's reasoning. 16 | * **Emacs Integration:** Designed to feel native within Emacs, leveraging familiar interfaces and workflows. 17 | * **Flexible LLM Support:** Connects to various LLM providers through [LiteLLM](https://github.com/BerriAI/litellm), allowing you to choose the model that best suits your needs. 18 | * **Context-Aware Interactions:** Manages chat history and project context for coherent sessions. 19 | 20 | ## Installation 21 | 22 | 1. **Prerequisites:** 23 | * Emacs 28 or higher. 24 | * Python 3.x. 25 | 2. **Install Python Dependencies:** 26 | ```bash 27 | pip install -r requirements.txt 28 | ``` 29 | 3. **Install with straight.el:** Add to your Emacs config: 30 | 31 | ```emacs-lisp 32 | (use-package emigo 33 | :straight (:host github :repo "MatthewZMD/emigo" :files (:defaults "*.py" "*.el")) 34 | :config 35 | (emigo-enable) ;; Starts the background process automatically 36 | :custom 37 | ;; Encourage using OpenRouter with Deepseek 38 | (emigo-model "openrouter/deepseek/deepseek-chat-v3-0324") 39 | (emigo-base-url "https://openrouter.ai/api/v1") 40 | (emigo-api-key (getenv "OPENROUTER_API_KEY"))) 41 | ``` 42 | 43 | ## Usage 44 | 45 | ### Basic Interaction 46 | 1. **Start Emigo:** Navigate to your project directory (or any directory you want to work in) and run `M-x emigo`. 47 | 2. **Interact:** Emigo will open a dedicated buffer. The AI will respond, potentially using tools. You might be asked for approval for certain actions (like running commands or writing files). 48 | 3. **Send Prompts:** Type your prompt and press `C-c C-c` or `C-m` to send it to Emigo. 49 | 50 | ### Context Management 51 | - **Add Files:** 52 | - Mention files in your prompt using `@` (e.g., `Refactor @src/utils.py`) 53 | - Or use `C-c f` to interactively add files 54 | - **List Files in Context:** `C-c l` 55 | - **Remove Files from Context:** `C-c j` 56 | - **Clear Chat History:** `C-c H` 57 | - **View History:** `C-c h` (shows in Org mode buffer) 58 | 59 | ### Keybindings 60 | #### Core Commands 61 | - `C-c C-c` / `C-m` - Send current prompt 62 | - `C-c r` - Restart Emigo process 63 | - `C-c k` - Cancel current interaction 64 | 65 | Note: Emigo manages sessions based on the directory where you invoke `M-x emigo`. If invoked within a Git repository, the repository root is typically used as the session path. Use `C-u M-x emigo` to force the session path to be the current `default-directory`. 66 | 67 | ## Understanding Tool Use 68 | 69 | The core of Emigo's power lies in its agentic tool use. Instead of just providing code suggestions, the LLM analyzes your request and decides which actions (tools) are necessary to accomplish the task. 70 | 71 | 1. **LLM Reasoning:** Based on your prompt and the current context, the LLM determines the next step. 72 | 2. **Tool Selection:** It chooses an appropriate tool, such as `read_file`, `write_to_file`, `replace_in_file`, `execute_command`, `list_files`, `list_repomap`, or `ask_followup_question`. 73 | 3. **Tool Execution:** Emigo executes the chosen tool, potentially asking for your approval for sensitive operations. 74 | 4. **Result Feedback:** The result of the tool execution (e.g., file content, command output, error message) is fed back into the conversation history. 75 | 5. **Iteration:** The LLM uses this new information to decide the next step, continuing the cycle until the task is complete or requires further input. 76 | 77 | This iterative process allows Emigo to tackle more complex tasks that involve multiple steps and interactions with your project files and system. The LLM uses an XML format to specify the tool and its parameters. 78 | -------------------------------------------------------------------------------- /README_REPLIT.md: -------------------------------------------------------------------------------- 1 | # Emigo on Replit - Setup Guide 2 | 3 | ## What is Emigo? 4 | 5 | **Emigo** is an AI coding assistant for Emacs - it's an intelligent, agentic plugin that understands and interacts with your codebase directly within Emacs. 6 | 7 | ⚠️ **Important:** This is **NOT a web application**. It's an Emacs editor plugin with a Python backend. 8 | 9 | ## What's Set Up on Replit 10 | 11 | ✅ **Python 3.11** - Installed and configured 12 | ✅ **All Dependencies** - litellm, epc, networkx, pygments, and all other requirements 13 | ✅ **Code Validation** - All modules pass syntax checks 14 | ✅ **Test Workflow** - Validation test runs successfully 15 | 16 | ## How to Use This Project 17 | 18 | ### Option 1: Use in Your Own Emacs Setup 19 | 20 | 1. **Clone this repo to your local machine:** 21 | ```bash 22 | git clone 23 | cd emigo 24 | ``` 25 | 26 | 2. **Ensure Python dependencies are installed:** 27 | ```bash 28 | pip install -r requirements.txt 29 | ``` 30 | 31 | 3. **Install in Emacs** using straight.el: 32 | ```emacs-lisp 33 | (use-package emigo 34 | :straight (:host github :repo "MatthewZMD/emigo" :files (:defaults "*.py" "*.el")) 35 | :config 36 | (emigo-enable) 37 | :custom 38 | (emigo-model "openrouter/deepseek/deepseek-chat-v3-0324") 39 | (emigo-base-url "https://openrouter.ai/api/v1") 40 | (emigo-api-key (getenv "OPENROUTER_API_KEY"))) 41 | ``` 42 | 43 | 4. **Set up your API key** (e.g., for OpenRouter): 44 | ```bash 45 | export OPENROUTER_API_KEY="your-api-key-here" 46 | ``` 47 | 48 | 5. **Use in Emacs:** 49 | - Open your project in Emacs 50 | - Run `M-x emigo` 51 | - Start coding with AI assistance! 52 | 53 | ### Option 2: Test/Develop on Replit 54 | 55 | Since Emacs isn't pre-installed on Replit, you can: 56 | 57 | 1. **Validate the setup** (already configured): 58 | - Click the "Run" button to see the validation test 59 | - All dependencies and modules should show ✓ OK 60 | 61 | 2. **Install Emacs on Replit** (optional, for testing): 62 | ```bash 63 | # Install Emacs 64 | nix-env -iA nixpkgs.emacs 65 | 66 | # Verify installation 67 | emacs --version 68 | ``` 69 | 70 | 3. **Develop/modify the Python backend:** 71 | - All Python files are ready for development 72 | - Core modules: `emigo.py`, `llm_worker.py`, `session.py`, `tools.py` 73 | - Test your changes with: `python3 test_setup.py` 74 | 75 | ## Key Features 76 | 77 | - 🤖 **Agentic Tool Use** - LLM can execute tools (read/write files, run commands) 78 | - 🔌 **Flexible LLM Support** - Works with OpenRouter, Deepseek, OpenAI, and more 79 | - 📝 **Context-Aware** - Maintains chat history and project context 80 | - 🎯 **Repository Mapping** - Understands your codebase structure 81 | 82 | ## Emacs Commands (when running in Emacs) 83 | 84 | - `M-x emigo` - Start Emigo 85 | - `C-c C-c` or `C-m` - Send prompt 86 | - `C-c f` - Add files to context 87 | - `C-c l` - List files in context 88 | - `C-c j` - Remove files from context 89 | - `C-c H` - Clear chat history 90 | - `C-c r` - Restart Emigo process 91 | 92 | ## Project Structure 93 | 94 | ``` 95 | emigo/ 96 | ├── emigo.py # Main Python backend (EPC server) 97 | ├── llm_worker.py # LLM interaction subprocess 98 | ├── emigo.el # Emacs Lisp frontend 99 | ├── emigo-epc.el # EPC client for Emacs 100 | ├── session.py # Session management 101 | ├── tools.py # Tool implementations 102 | ├── repomapper.py # Repository mapping 103 | ├── requirements.txt # Python dependencies 104 | └── test_setup.py # Validation script 105 | ``` 106 | 107 | ## Architecture 108 | 109 | 1. **Emacs** (frontend) ↔️ **EPC** ↔️ **emigo.py** (backend) 110 | 2. **emigo.py** manages **llm_worker.py** subprocess 111 | 3. **llm_worker.py** handles LLM API calls via LiteLLM 112 | 4. Tools execute in **tools.py** and report back to Emacs 113 | 114 | ## For Developers 115 | 116 | If you're contributing to Emigo: 117 | 118 | 1. **Run validation:** `python3 test_setup.py` 119 | 2. **Syntax check:** `python3 -m py_compile .py` 120 | 3. **Check imports:** `python3 -c "import "` 121 | 122 | ## Resources 123 | 124 | - **Original Repository:** [MatthewZMD/emigo](https://github.com/MatthewZMD/emigo) 125 | - **Documentation:** See main [README.md](README.md) 126 | - **Emacs Config:** See [emigo.el](emigo.el) for Elisp side 127 | 128 | ## Getting Help 129 | 130 | This project is in active development. Check the [issues page](https://github.com/MatthewZMD/emigo/issues) for known problems and feature requests. 131 | 132 | --- 133 | 134 | **Note:** Emigo requires Emacs to run. This Replit setup validates that the Python backend is correctly configured and ready to use. 135 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Central Configuration for Emigo. 6 | 7 | This module stores shared constants and configuration settings used across 8 | different parts of the Emigo Python backend. This includes lists for 9 | ignoring directories and file extensions during repository scanning, and 10 | a list defining files considered "important" at the root of a project for 11 | prioritization in the repository map. 12 | 13 | Centralizing these configurations makes them easier to manage and modify. 14 | """ 15 | 16 | import os 17 | 18 | 19 | # --- Tool Result/Error Messages --- 20 | 21 | TOOL_RESULT_SUCCESS = "Tool executed successfully." 22 | TOOL_RESULT_OUTPUT_PREFIX = "Tool output:\n" 23 | TOOL_DENIED = "The user denied this operation." 24 | TOOL_ERROR_PREFIX = "[Tool Error] " 25 | TOOL_ERROR_SUFFIX = "" 26 | 27 | 28 | # --- Ignored Directories --- 29 | # Used in agents.py (_get_environment_details) and repomapper.py (_find_src_files) 30 | # Combine common ignored directories from both places. 31 | IGNORED_DIRS = [ 32 | r'^\.emigo_repomap$', 33 | r'^\.aider.*$', 34 | r'^\.(git|hg|svn)$', # Version control 35 | r'^__pycache__$', # Python cache 36 | r'^node_modules$', # Node.js dependencies 37 | r'^(\.venv|venv|\.env|env)$', # Virtual environments 38 | r'^(build|dist)$', # Build artifacts 39 | r'^vendor$' # Vendor dependencies (common in some languages) 40 | ] 41 | 42 | # --- Ignored File Extensions (Binary/Non-Source) --- 43 | # Used in repomapper.py (_find_src_files) 44 | BINARY_EXTS = { 45 | # Images 46 | '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.ico', '.svg', 47 | # Media 48 | '.mp3', '.mp4', '.mov', '.avi', '.mkv', '.wav', 49 | # Archives 50 | '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar', 51 | # Documents 52 | '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', 53 | # Other binaries 54 | '.exe', '.dll', '.so', '.o', '.a', '.class', '.jar', 55 | # Logs/Temp 56 | '.log', '.tmp', '.swp' 57 | } 58 | 59 | # --- Important Files (Root Level) --- 60 | # Used in repomapper.py (is_important) 61 | # List of filenames/paths considered important at the root of a project. 62 | ROOT_IMPORTANT_FILES_LIST = [ 63 | # Version Control 64 | ".gitignore", 65 | ".gitattributes", 66 | # Documentation 67 | "README", 68 | "README.md", 69 | "README.txt", 70 | "README.rst", 71 | "CONTRIBUTING", 72 | "CONTRIBUTING.md", 73 | "CONTRIBUTING.txt", 74 | "CONTRIBUTING.rst", 75 | "LICENSE", 76 | "LICENSE.md", 77 | "LICENSE.txt", 78 | "CHANGELOG", 79 | "CHANGELOG.md", 80 | "CHANGELOG.txt", 81 | "CHANGELOG.rst", 82 | "SECURITY", 83 | "SECURITY.md", 84 | "SECURITY.txt", 85 | "CODEOWNERS", 86 | # Package Management and Dependencies 87 | "requirements.txt", 88 | "Pipfile", 89 | "Pipfile.lock", 90 | "pyproject.toml", 91 | "setup.py", 92 | "setup.cfg", 93 | "package.json", 94 | "package-lock.json", 95 | "yarn.lock", 96 | "npm-shrinkwrap.json", 97 | "Gemfile", 98 | "Gemfile.lock", 99 | "composer.json", 100 | "composer.lock", 101 | "pom.xml", 102 | "build.gradle", 103 | "build.gradle.kts", 104 | "build.sbt", 105 | "go.mod", 106 | "go.sum", 107 | "Cargo.toml", 108 | "Cargo.lock", 109 | "mix.exs", 110 | "rebar.config", 111 | "project.clj", 112 | "Podfile", 113 | "Cartfile", 114 | "dub.json", 115 | "dub.sdl", 116 | # Configuration and Settings 117 | ".env", 118 | ".env.example", 119 | ".editorconfig", 120 | "tsconfig.json", 121 | "jsconfig.json", 122 | ".babelrc", 123 | "babel.config.js", 124 | ".eslintrc", 125 | ".eslintignore", 126 | ".prettierrc", 127 | ".stylelintrc", 128 | "tslint.json", 129 | ".pylintrc", 130 | ".flake8", 131 | ".rubocop.yml", 132 | ".scalafmt.conf", 133 | ".dockerignore", 134 | ".gitpod.yml", 135 | "sonar-project.properties", 136 | "renovate.json", 137 | "dependabot.yml", 138 | ".pre-commit-config.yaml", 139 | "mypy.ini", 140 | "tox.ini", 141 | ".yamllint", 142 | "pyrightconfig.json", 143 | # Build and Compilation 144 | "webpack.config.js", 145 | "rollup.config.js", 146 | "parcel.config.js", 147 | "gulpfile.js", 148 | "Gruntfile.js", 149 | "build.xml", 150 | "build.boot", 151 | "project.json", 152 | "build.cake", 153 | "MANIFEST.in", 154 | # Testing 155 | "pytest.ini", 156 | "phpunit.xml", 157 | "karma.conf.js", 158 | "jest.config.js", 159 | "cypress.json", 160 | ".nycrc", 161 | ".nycrc.json", 162 | # CI/CD 163 | ".travis.yml", 164 | ".gitlab-ci.yml", 165 | "Jenkinsfile", 166 | "azure-pipelines.yml", 167 | "bitbucket-pipelines.yml", 168 | "appveyor.yml", 169 | "circle.yml", 170 | ".circleci/config.yml", 171 | ".github/dependabot.yml", 172 | "codecov.yml", 173 | ".coveragerc", 174 | # Docker and Containers 175 | "Dockerfile", 176 | "docker-compose.yml", 177 | "docker-compose.override.yml", 178 | # Cloud and Serverless 179 | "serverless.yml", 180 | "firebase.json", 181 | "now.json", 182 | "netlify.toml", 183 | "vercel.json", 184 | "app.yaml", 185 | "terraform.tf", 186 | "main.tf", 187 | "cloudformation.yaml", 188 | "cloudformation.json", 189 | "ansible.cfg", 190 | "kubernetes.yaml", 191 | "k8s.yaml", 192 | # Database 193 | "schema.sql", 194 | "liquibase.properties", 195 | "flyway.conf", 196 | # Framework-specific 197 | "next.config.js", 198 | "nuxt.config.js", 199 | "vue.config.js", 200 | "angular.json", 201 | "gatsby-config.js", 202 | "gridsome.config.js", 203 | # API Documentation 204 | "swagger.yaml", 205 | "swagger.json", 206 | "openapi.yaml", 207 | "openapi.json", 208 | # Development environment 209 | ".nvmrc", 210 | ".ruby-version", 211 | ".python-version", 212 | "Vagrantfile", 213 | # Quality and metrics 214 | ".codeclimate.yml", 215 | "codecov.yml", 216 | # Documentation 217 | "mkdocs.yml", 218 | "_config.yml", 219 | "book.toml", 220 | "readthedocs.yml", 221 | ".readthedocs.yaml", 222 | # Package registries 223 | ".npmrc", 224 | ".yarnrc", 225 | # Linting and formatting 226 | ".isort.cfg", 227 | ".markdownlint.json", 228 | ".markdownlint.yaml", 229 | # Security 230 | ".bandit", 231 | ".secrets.baseline", 232 | # Misc 233 | ".pypirc", 234 | ".gitkeep", 235 | ".npmignore", 236 | ] 237 | 238 | # Normalize the list once into a set for efficient lookup 239 | NORMALIZED_ROOT_IMPORTANT_FILES = set(os.path.normpath(path) for path in ROOT_IMPORTANT_FILES_LIST) 240 | -------------------------------------------------------------------------------- /agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Defines the Agent class responsible for the core logic of interacting with the LLM. 6 | 7 | This module contains the `Agent` class which encapsulates the agentic loop behavior. 8 | It constructs prompts, processes LLM responses, and determines when to use tools. 9 | """ 10 | 11 | import json # Keep for parsing LLM responses if needed 12 | import os 13 | import sys 14 | from typing import List, Dict, Optional 15 | 16 | from llm import LLMClient 17 | from repomapper import RepoMapper # Keep for agent's internal use if needed (e.g., environment details) 18 | # Import tool definitions and provider formatting 19 | from tool_definitions import get_all_tools 20 | from llm_providers import get_formatted_tools 21 | # Import only the base system prompt template 22 | from system_prompt import MAIN_SYSTEM_PROMPT 23 | import tiktoken # For token counting 24 | 25 | from utils import ( 26 | get_os_name, 27 | eval_in_emacs 28 | ) 29 | 30 | class Agent: 31 | """ 32 | Manages the agentic interaction loop for a given session. 33 | 34 | This class is instantiated by `llm_worker.py` for each interaction. It takes the 35 | current session state (prompt, history, context) and orchestrates the 36 | conversation with the LLM. 37 | 38 | Key Responsibilities: 39 | - Building the system prompt, incorporating dynamic information like the 40 | current working directory and OS details. 41 | - Preparing the full message list for the LLM, including the system prompt, 42 | truncated history, and environment details (provided by the worker). 43 | - Managing history truncation logic to stay within token limits. 44 | - Parsing LLM responses to identify tool usage requests using XML-like tags. 45 | - Determining the next step in the interaction (e.g., call tool, respond directly, 46 | finish). 47 | 48 | Note: History itself is managed by the `Session` object in the main `emigo.py` 49 | process and passed to the worker for each interaction. Tool implementations 50 | reside in `tools.py` and are executed via the main `emigo.py` process. 51 | """ 52 | 53 | def __init__(self, session_path: str, llm_client: LLMClient, chat_files_ref: Dict[str, List[str]], verbose: bool = False): 54 | self.session_path = session_path # This is the root directory for the session 55 | self.llm_client = llm_client 56 | self.chat_files_ref = chat_files_ref # Reference to Emigo's chat_files dict 57 | self.environment_details_str = "" # Initialize, will be updated by worker loop 58 | self.verbose = verbose 59 | # Keep RepoMapper instance, but usage is restricted 60 | self.repo_mapper = RepoMapper(root_dir=self.session_path, verbose=self.verbose) 61 | # History truncation settings 62 | self.max_history_tokens = 8000 # Target max tokens for history 63 | self.min_history_messages = 3 # Always keep at least this many messages 64 | # Tokenizer for history management 65 | try: 66 | self.tokenizer = tiktoken.get_encoding("cl100k_base") 67 | # Test the tokenizer works 68 | test_tokens = self.tokenizer.encode("test") 69 | if not test_tokens: 70 | raise ValueError("Tokenizer returned empty tokens") 71 | except Exception as e: 72 | print(f"Warning: Could not initialize tokenizer. Using simple character count fallback. Error: {e}", file=sys.stderr) 73 | self.tokenizer = None 74 | 75 | # --- Prompt Building --- 76 | 77 | def _build_system_prompt(self) -> str: 78 | """Builds the system prompt, inserting dynamic info and formatted tool list.""" 79 | session_dir = self.session_path 80 | os_name = get_os_name() 81 | shell = "/bin/bash" # Default shell - TODO: Get from Emacs? 82 | homedir = os.path.expanduser("~") 83 | 84 | # Get all tool definitions 85 | available_tools = get_all_tools() 86 | # Format tools for the specific LLM provider (e.g., OpenAI) 87 | # Assumes llm_client has model_name attribute 88 | formatted_tools = get_formatted_tools(available_tools, self.llm_client.model_name) 89 | # Convert the formatted list to a JSON string for insertion 90 | tools_json_string = json.dumps(formatted_tools, indent=2) 91 | 92 | # Use .format() on the MAIN_SYSTEM_PROMPT template 93 | prompt = MAIN_SYSTEM_PROMPT.format( 94 | session_dir=session_dir.replace(os.sep, '/'), # Ensure POSIX paths 95 | os_name=os_name, 96 | shell=shell, 97 | homedir=homedir.replace(os.sep, '/'), 98 | tools_json=tools_json_string # Insert the formatted tool definitions 99 | ) 100 | return prompt 101 | 102 | # --- LLM Prompt Preparation & History Management --- 103 | # _parse_tool_use (XML parser) is removed. Parsing now happens in llm_worker.py 104 | 105 | def _prepare_llm_prompt(self, system_prompt: str, current_interaction_history: List[Dict]) -> List[Dict]: 106 | """Prepares the list of messages for the LLM, including history truncation and environment details. 107 | Uses the provided current_interaction_history list (list of dicts). 108 | Environment details are stored in self.environment_details_str.""" 109 | # Always include system prompt 110 | messages_to_send = [{"role": "system", "content": system_prompt}] 111 | 112 | # --- History Truncation: Keep messages within token limit --- 113 | # Truncate the provided history list (already dicts) 114 | messages_to_send.extend(self._truncate_history(current_interaction_history)) 115 | 116 | # --- Append Environment Details (Stored in self.environment_details_str) --- 117 | # Use copy() to avoid modifying the history object directly 118 | last_message_copy = messages_to_send[-1].copy() 119 | last_message_copy["content"] += f"\n\n{self.environment_details_str}" # Append stored details 120 | messages_to_send[-1] = last_message_copy # Replace the last message 121 | 122 | return messages_to_send 123 | 124 | def _call_llm_and_stream_response(self, messages_to_send: List[Dict]) -> Optional[str]: 125 | """Calls the LLM, streams the response, and returns the full response text.""" 126 | full_response = "" 127 | eval_in_emacs("emigo--flush-buffer", self.session_path, "\nAssistant:\n", "llm") # Signal start 128 | try: 129 | # Send the temporary list with context included 130 | response_stream = self.llm_client.send(messages_to_send, stream=True) 131 | for chunk in response_stream: 132 | # Ensure chunk is a string, default to empty string if None 133 | content_to_flush = chunk or "" 134 | eval_in_emacs("emigo--flush-buffer", self.session_path, content_to_flush, "llm") 135 | if chunk: # Only append non-None chunks to full_response 136 | full_response += chunk 137 | return full_response 138 | except Exception as e: 139 | error_message = f"[Error during LLM communication: {e}]" 140 | print(f"\n{error_message}", file=sys.stderr) 141 | eval_in_emacs("emigo--flush-buffer", self.session_path, str(error_message), "error") 142 | # Add error to persistent history (handled in main loop now) 143 | # self.llm_client.append_history({"role": "assistant", "content": error_message}) 144 | return None # Indicate error 145 | 146 | # --- History Truncation & Token Counting --- 147 | 148 | def _truncate_history(self, history: List[Dict[str, str]]) -> List[Dict[str, str]]: 149 | """Truncate history to fit within token limits while preserving important messages.""" 150 | if not history: 151 | return [] 152 | 153 | # Always keep first user message for context 154 | truncated = [history[0]] 155 | current_tokens = self._count_tokens(truncated[0]["content"]) 156 | 157 | # Add messages from newest to oldest until we hit the limit 158 | for msg in reversed(history[1:]): 159 | msg_tokens = self._count_tokens(msg["content"]) 160 | if current_tokens + msg_tokens > self.max_history_tokens: 161 | if len(truncated) >= self.min_history_messages: 162 | break 163 | # If we're below min messages, keep going but warn 164 | print("Warning: History exceeds token limit but below min message count", file=sys.stderr) 165 | 166 | truncated.insert(1, msg) # Insert after first message 167 | current_tokens += msg_tokens 168 | 169 | if self.verbose and len(truncated) < len(history): 170 | print(f"History truncated from {len(history)} to {len(truncated)} messages ({current_tokens} tokens)", file=sys.stderr) 171 | 172 | return truncated 173 | 174 | def _count_tokens(self, text: str) -> int: 175 | """Count tokens in text using tokenizer or fallback method.""" 176 | if not text: 177 | return 0 178 | 179 | if self.tokenizer: 180 | try: 181 | return len(self.tokenizer.encode(text)) 182 | except Exception as e: 183 | print(f"Token counting error, using fallback: {e}", file=sys.stderr) 184 | 185 | # Fallback: approximate tokens as 4 chars per token 186 | return max(1, len(text) // 4) 187 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Utility functions for Emigo, primarily focused on Emacs communication. 6 | 7 | This module provides helper functions used across the Emigo Python backend. 8 | Its main role is to facilitate communication from Python back to the Emacs 9 | Lisp frontend using the EPC (Emacs Process Communication) protocol. 10 | 11 | Key Features: 12 | - Initialization and management of the EPC client connection to Emacs. 13 | - Functions (`eval_in_emacs`, `get_emacs_func_result`, `get_emacs_var`, etc.) 14 | to execute Elisp code or retrieve variables from Emacs, both synchronously 15 | and asynchronously. 16 | - Argument transformation helpers (`epc_arg_transformer`) to bridge Python 17 | data types and Elisp S-expressions. 18 | - Basic file/path utilities (`path_to_uri`, `read_file_content`). 19 | - OS detection (`get_os_name`). 20 | """ 21 | 22 | # Copyright (C) 2022 Andy Stewart 23 | # 24 | # Author: Andy Stewart 25 | # Maintainer: Andy Stewart 26 | # 27 | # This program is free software: you can redistribute it and/or modify 28 | # it under the terms of the GNU General Public License as published by 29 | # the Free Software Foundation, either version 3 of the License, or 30 | # any later version. 31 | # 32 | # This program is distributed in the hope that it will be useful, 33 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 34 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 35 | # GNU General Public License for more details. 36 | # 37 | # You should have received a copy of the GNU General Public License 38 | # along with this program. If not, see . 39 | import functools 40 | from typing import Optional 41 | from urllib.parse import urlparse 42 | 43 | import sexpdata 44 | import logging 45 | import pathlib 46 | import platform 47 | import sys 48 | import re 49 | 50 | from epc.client import EPCClient 51 | 52 | import orjson as json_parser 53 | 54 | epc_client: Optional[EPCClient] = None 55 | 56 | # initialize logging, default to STDERR and INFO level 57 | logger = logging.getLogger("emigo") 58 | logger.setLevel(logging.INFO) 59 | logger.addHandler(logging.StreamHandler()) 60 | 61 | 62 | def init_epc_client(emacs_server_port): 63 | global epc_client 64 | 65 | if epc_client is None: 66 | try: 67 | epc_client = EPCClient(("127.0.0.1", emacs_server_port), log_traceback=True) 68 | except ConnectionRefusedError: 69 | import traceback 70 | logger.error(traceback.format_exc()) 71 | 72 | 73 | def close_epc_client(): 74 | if epc_client is not None: 75 | epc_client.close() 76 | 77 | 78 | def eval_in_emacs(method_name, *args): 79 | # Construct the list for the S-expression directly with Python types 80 | sexp_list = [sexpdata.Symbol(method_name)] + list(args) 81 | # Let sexpdata.dumps handle conversion and escaping of Python types (str, int, etc.) 82 | sexp = sexpdata.dumps(sexp_list) 83 | 84 | logger.debug("Eval in Emacs: %s", sexp) 85 | # Call eval-in-emacs elisp function. 86 | epc_client.call("eval-in-emacs", [sexp]) # type: ignore 87 | 88 | 89 | def message_emacs(message: str): 90 | """Message to Emacs with prefix.""" 91 | eval_in_emacs("message", "[Emigo] " + message) 92 | 93 | 94 | def epc_arg_transformer(arg): 95 | """Transform elisp object to python object 96 | 1 => 1 97 | "string" => "string" 98 | (list :a 1 :b 2) => {"a": 1, "b": 2} 99 | (list :a 1 :b (list :c 2)) => {"a": 1, "b": {"c": 2}} 100 | (list 1 2 3) => [1 2 3] 101 | (list 1 2 (list 3 4)) => [1 2 [3 4]] 102 | """ 103 | if not isinstance(arg, list): 104 | return arg 105 | 106 | # NOTE: Empty list elisp can be treated as both empty python dict/list 107 | # Convert empty elisp list to empty python dict due to compatibility. 108 | 109 | # check if we can tranform arg to python dict instance 110 | type_dict_p = len(arg) % 2 == 0 111 | if type_dict_p: 112 | for v in arg[::2]: 113 | if (not isinstance(v, sexpdata.Symbol)) or not v.value().startswith(":"): 114 | type_dict_p = False 115 | break 116 | 117 | if type_dict_p: 118 | # transform [Symbol(":a"), 1, Symbol(":b"), 2] to dict(a=1, b=2) 119 | ret = dict() 120 | for i in range(0, len(arg), 2): 121 | ret[arg[i].value()[1:]] = epc_arg_transformer(arg[i + 1]) 122 | return ret 123 | else: 124 | return list(map(epc_arg_transformer, arg)) 125 | 126 | 127 | def convert_emacs_bool(symbol_value, symbol_is_boolean): 128 | if symbol_is_boolean == "t": 129 | return symbol_value is True 130 | else: 131 | return symbol_value 132 | 133 | def get_emacs_vars(args): 134 | return list(map(lambda result: convert_emacs_bool(result[0], result[1]) if result != [] else False, 135 | epc_client.call_sync("get-emacs-vars", args))) # type: ignore 136 | 137 | 138 | def get_emacs_var(var_name): 139 | symbol_value, symbol_is_boolean = epc_client.call_sync("get-emacs-var", [var_name]) # type: ignore 140 | 141 | return convert_emacs_bool(symbol_value, symbol_is_boolean) 142 | 143 | 144 | def get_emacs_func_result(method_name, *args): 145 | """Call eval-in-emacs elisp function synchronously and return the result.""" 146 | result = epc_client.call_sync(method_name, args) # type: ignore 147 | return result 148 | 149 | 150 | def get_command_result(command_string, cwd): 151 | import subprocess 152 | 153 | process = subprocess.Popen(command_string, cwd=cwd, shell=True, text=True, 154 | stdout=subprocess.PIPE, stderr=subprocess.PIPE, 155 | encoding="utf-8") 156 | ret = process.wait() 157 | return "".join((process.stdout if ret == 0 else process.stderr).readlines()).strip() # type: ignore 158 | 159 | 160 | def generate_request_id(): 161 | import random 162 | return abs(random.getrandbits(16)) 163 | 164 | 165 | # modified from Lib/pathlib.py 166 | def _make_uri_win32(path): 167 | from urllib.parse import quote_from_bytes as urlquote_from_bytes 168 | # Under Windows, file URIs use the UTF-8 encoding. 169 | drive = path.drive 170 | if len(drive) == 2 and drive[1] == ':': 171 | # It's a path on a local drive => 'file:///c:/a/b' 172 | rest = path.as_posix()[2:].lstrip('/') 173 | return 'file:///%s%%3A/%s' % ( 174 | drive[0], urlquote_from_bytes(rest.encode('utf-8'))) 175 | else: 176 | # It's a path on a network drive => 'file://host/share/a/b' 177 | return 'file:' + urlquote_from_bytes(path.as_posix().encode('utf-8')) 178 | 179 | def path_to_uri(path): 180 | path = pathlib.Path(path) 181 | if get_os_name() != "windows": 182 | uri = path.as_uri() 183 | else: 184 | if not path.is_absolute(): 185 | raise ValueError("relative path can't be expressed as a file URI") 186 | # encode uri to 'file:///c%3A/project/xxx.js' like vscode does 187 | uri = _make_uri_win32(path) 188 | return uri 189 | 190 | 191 | def uri_to_path(uri): 192 | from urllib.parse import unquote 193 | # parse first, '#' may be part of filepath(encoded) 194 | parsed = urlparse(uri) 195 | # for example, ts-ls return 'file:///c%3A/lib/ref.js' 196 | path = unquote(parsed.path) 197 | if sys.platform == "win32": 198 | path = path[1:] 199 | return path 200 | 201 | 202 | def path_as_key(path): 203 | key = path 204 | # NOTE: (buffer-file-name) return "d:/Case/a.go", gopls return "file:///D:/Case/a.go" 205 | if sys.platform == "win32": 206 | path = pathlib.Path(path).as_posix() 207 | key = path.lower() 208 | return key 209 | 210 | 211 | def add_to_path_dict(path_dict, filepath, value): 212 | path_dict[path_as_key(filepath)] = value 213 | 214 | 215 | def is_in_path_dict(path_dict, path): 216 | path_key = path_as_key(path) 217 | return path_key in path_dict 218 | 219 | 220 | def remove_from_path_dict(path_dict, path): 221 | del path_dict[path_as_key(path)] 222 | 223 | 224 | def get_from_path_dict(path_dict, filepath): 225 | return path_dict[path_as_key(filepath)] 226 | 227 | 228 | def log_time(message): 229 | import datetime 230 | logger.info("\n--- [{}] {}".format(datetime.datetime.now().time(), message)) 231 | 232 | @functools.lru_cache(maxsize=None) 233 | def get_emacs_version(): 234 | return get_emacs_func_result("get-emacs-version") 235 | 236 | 237 | def get_os_name(): 238 | return platform.system().lower() 239 | 240 | def parse_json_content(content): 241 | return json_parser.loads(content) 242 | 243 | def read_file_content(abs_path: str) -> str: 244 | """Reads the content of a file.""" 245 | # Basic implementation, consider adding error handling for encoding etc. 246 | # like in repomapper.read_text 247 | try: 248 | # Try UTF-8 first, the most common encoding 249 | with open(abs_path, 'r', encoding='utf-8') as f: 250 | return f.read() 251 | except UnicodeDecodeError: 252 | # If UTF-8 fails, try the system's default encoding or latin-1 as fallback 253 | try: 254 | with open(abs_path, 'r', encoding=sys.getdefaultencoding()) as f: 255 | return f.read() 256 | except UnicodeDecodeError: 257 | # As a last resort, try latin-1, which rarely fails but might misinterpret chars 258 | with open(abs_path, 'r', encoding='latin-1') as f: 259 | return f.read() 260 | except Exception as e: 261 | print(f"Error reading file {abs_path}: {e}", file=sys.stderr) 262 | raise # Re-raise for the agent handler to catch and format 263 | 264 | def touch(path): 265 | import os 266 | 267 | if not os.path.exists(path): 268 | basedir = os.path.dirname(path) 269 | 270 | if not os.path.exists(basedir): 271 | os.makedirs(basedir) 272 | 273 | with open(path, 'a'): 274 | os.utime(path) 275 | 276 | 277 | # --- Filtering Helper --- 278 | def _filter_environment_details(text: str) -> str: 279 | """Removes ... blocks from text.""" 280 | if not isinstance(text, str): # Handle potential non-string content 281 | return text 282 | # Use re.DOTALL to make '.' match newlines, make it non-greedy 283 | return re.sub(r".*?\s*", "\n", text, flags=re.DOTALL) 284 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /tool_definitions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Defines the structure for tools and registers available tools for Emigo. 6 | 7 | This module provides: 8 | - TypedDict definitions for ToolParameter and ToolDefinition. 9 | - Concrete definitions for each available tool, linking to their implementation 10 | in tools.py. 11 | - A TOOL_REGISTRY dictionary for easy access to all tool definitions. 12 | - Helper functions to retrieve tool definitions. 13 | """ 14 | 15 | from typing import Callable, Dict, List, TypedDict, Literal, Optional 16 | # Import tool implementation functions from tools.py 17 | from tools import ( 18 | execute_command, 19 | read_file, 20 | write_to_file, 21 | replace_in_file, 22 | search_files, 23 | list_files, 24 | list_repomap, 25 | ask_followup_question, 26 | attempt_completion 27 | ) 28 | 29 | 30 | # --- Tool Name Constants --- 31 | TOOL_EXECUTE_COMMAND = "execute_command" 32 | TOOL_READ_FILE = "read_file" 33 | TOOL_WRITE_TO_FILE = "write_to_file" 34 | TOOL_REPLACE_IN_FILE = "replace_in_file" 35 | TOOL_SEARCH_FILES = "search_files" 36 | TOOL_LIST_FILES = "list_files" 37 | TOOL_LIST_REPOMAP = "list_repomap" 38 | TOOL_ASK_FOLLOWUP_QUESTION = "ask_followup_question" 39 | TOOL_ATTEMPT_COMPLETION = "attempt_completion" 40 | 41 | 42 | # --- Type Definitions --- 43 | 44 | class ToolParameter(TypedDict): 45 | """Defines the structure for a single tool parameter.""" 46 | name: str 47 | type: Literal["string", "integer", "boolean", "number", "array", "object"] # JSON Schema types 48 | description: str 49 | required: bool 50 | # Optional fields for complex types (future enhancement) 51 | # items: Optional[Dict] # For array type 52 | # properties: Optional[Dict[str, Dict]] # For object type 53 | 54 | class ToolDefinition(TypedDict): 55 | """Defines the structure for a single tool.""" 56 | name: str 57 | description: str 58 | parameters: List[ToolParameter] 59 | function: Callable[..., str] # Function signature: (session: Session, parameters: Dict[str, Any]) -> str 60 | 61 | # --- Tool Definitions --- 62 | 63 | # Define each tool using the ToolDefinition structure 64 | 65 | EXECUTE_COMMAND_TOOL = ToolDefinition( 66 | name="execute_command", 67 | description="Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run.", 68 | parameters=[ 69 | ToolParameter(name="command", type="string", description="The shell command to execute.", required=True), 70 | # Note: requires_approval is handled internally in emigo.py based on tool name, not an LLM param. 71 | ], 72 | function=execute_command 73 | ) 74 | 75 | READ_FILE_TOOL = ToolDefinition( 76 | name="read_file", 77 | description="Request to read the contents of a file at the specified path. Use this tool *only* when the user has explicitly instructed you to read a specific file path or you have already used list_repomap and identified this specific file as necessary for the next step. Do NOT use this tool based on guesses about where functionality might reside; use list_repomap first in such cases. Use this tool if the file's content is not already present in . Reading a file will add its content to for subsequent turns. May not be suitable for other types of binary files, as it returns the raw content as a string.", 78 | parameters=[ 79 | ToolParameter(name="path", type="string", description="The relative path of the file to read.", required=True), 80 | ], 81 | function=read_file 82 | ) 83 | 84 | WRITE_TO_FILE_TOOL = ToolDefinition( 85 | name="write_to_file", 86 | description="Request to write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. This tool will automatically create any directories needed to write the file.", 87 | parameters=[ 88 | ToolParameter(name="path", type="string", description="The relative path of the file to write.", required=True), 89 | ToolParameter(name="content", type="string", description="The complete content to write to the file.", required=True), 90 | ], 91 | function=write_to_file 92 | ) 93 | 94 | REPLACE_IN_FILE_TOOL = ToolDefinition( 95 | name="replace_in_file", 96 | description="Request to replace sections of content in an existing file using SEARCH/REPLACE blocks that define exact changes to specific parts of the file. This tool should be used when you need to make targeted changes to specific parts of a file.", 97 | parameters=[ 98 | ToolParameter(name="path", type="string", description="The relative path of the file to modify.", required=True), 99 | ToolParameter(name="diff", type="string", description=""" 100 | One or more SEARCH/REPLACE blocks following this exact format: 101 | ```` 102 | <<<<<<< SEARCH 103 | [exact content to find] 104 | ======= 105 | [new content to replace with] 106 | >>>>>>> REPLACE 107 | ```` 108 | Critical rules: 109 | 1. SEARCH content must match the associated file section to find EXACTLY: 110 | * Match character-for-character including whitespace, indentation, line endings 111 | * Include all comments, docstrings, etc. 112 | 2. SEARCH/REPLACE blocks will ONLY replace the first match occurrence. 113 | * Including multiple unique SEARCH/REPLACE blocks if you need to make multiple changes. 114 | * Include *just* enough lines in each SEARCH section to uniquely match each set of lines that need to change. 115 | * When using multiple SEARCH/REPLACE blocks, list them in the order they appear in the file. 116 | 3. Keep SEARCH/REPLACE blocks concise: 117 | * Break large SEARCH/REPLACE blocks into a series of smaller blocks that each change a small portion of the file. 118 | * Include just the changing lines, and a few surrounding lines if needed for uniqueness. 119 | * Do not include long runs of unchanging lines in SEARCH/REPLACE blocks. 120 | * Each line must be complete. Never truncate lines mid-way through as this can cause matching failures. 121 | 4. Special operations: 122 | * To move code: Use two SEARCH/REPLACE blocks (one to delete from original + one to insert at new location) 123 | * To delete code: Use empty REPLACE section""", required=True), 124 | ], 125 | function=replace_in_file 126 | ) 127 | 128 | SEARCH_FILES_TOOL = ToolDefinition( 129 | name="search_files", 130 | description="Request to perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with its line number and the line content.", 131 | parameters=[ 132 | ToolParameter(name="path", type="string", description="The path of the directory to search in (relative to the session directory). This directory will be recursively searched.", required=True), 133 | ToolParameter(name="pattern", type="string", description="The regular expression pattern to search for. Uses Python regex syntax. Ensure the pattern is correctly escaped if needed.", required=True), 134 | ToolParameter(name="case_sensitive", type="boolean", description="Whether the search should be case-sensitive (default: false).", required=False), 135 | ToolParameter(name="max_matches", type="integer", description="Maximum number of matches to return (default: 20, max: 200).", required=False), 136 | ], 137 | function=search_files 138 | ) 139 | 140 | LIST_FILES_TOOL = ToolDefinition( 141 | name="list_files", 142 | description="Request to list files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents. Do not use this tool to confirm the existence of files you may have created, as the user will let you know if the files were created successfully or not.", 143 | parameters=[ 144 | ToolParameter(name="path", type="string", description="The relative path of the directory to list.", required=True), 145 | ToolParameter(name="recursive", type="boolean", description="Whether to list files recursively (default: false).", required=False), 146 | ], 147 | function=list_files 148 | ) 149 | 150 | LIST_REPOMAP_TOOL = ToolDefinition( 151 | name="list_repomap", 152 | description="Request a high-level summary of the codebase structure within the session directory. This tool analyzes the source code files (respecting .gitignore and avoiding binary/ignored files) and extracts key definitions (classes, functions, methods, variables, etc.) along with relevant code snippets showing their usage context. It uses a ranking algorithm (PageRank) to prioritize the most important and interconnected parts of the code, especially considering files already discussed or mentioned. This provides a concise yet informative overview, far more useful than a simple file listing (list_files) or reading individual files (read_file) when you need to understand the project's architecture, identify where specific functionality resides, or plan complex changes. **When unsure where functionality resides or how code is structured, you MUST use list_repomap first.** It is much more efficient and context-aware than guessing file paths and using read_file sequentially. Use list_repomap to get a map of the relevant code landscape before diving into specific files. The analysis focuses on the source files within the session directory. The result of this tool will be added to the for subsequent turns.", 153 | parameters=[ # Add the path parameter here 154 | ToolParameter(name="path", type="string", description="Optional relative path of the directory to focus the analysis on. If omitted, analyzes the entire session directory.", required=False), 155 | ], 156 | function=list_repomap 157 | ) 158 | 159 | ASK_FOLLOWUP_QUESTION_TOOL = ToolDefinition( 160 | name="ask_followup_question", 161 | description="Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.", 162 | parameters=[ 163 | ToolParameter(name="question", type="string", description="The question to ask the user.", required=True), 164 | ToolParameter(name="options", type="array", description="Optional array of 2-5 string options for the user to choose from.", required=False), 165 | ], 166 | function=ask_followup_question 167 | ) 168 | 169 | ATTEMPT_COMPLETION_TOOL = ToolDefinition( 170 | name="attempt_completion", 171 | description="Use this tool ONLY when you have successfully completed all steps required by the user's request. After using a tool like `replace_in_file` or `write_to_file`, analyze the result: if the change successfully fulfills the user's request, use this tool to present the final result. Do not attempt further refinements unless explicitly asked. Optionally, provide a CLI command to demonstrate the result. The user may provide feedback if unsatisfied, which you can use to make improvements and try again.", 172 | parameters=[ 173 | ToolParameter(name="result", type="string", description="The final result description.", required=True), 174 | ToolParameter(name="command", type="string", description="Optional CLI command to demonstrate the result.", required=False), 175 | ], 176 | function=attempt_completion 177 | ) 178 | 179 | # --- Tool Registry --- 180 | 181 | TOOL_REGISTRY: Dict[str, ToolDefinition] = { 182 | tool['name']: tool for tool in [ 183 | EXECUTE_COMMAND_TOOL, 184 | READ_FILE_TOOL, 185 | WRITE_TO_FILE_TOOL, 186 | REPLACE_IN_FILE_TOOL, 187 | SEARCH_FILES_TOOL, 188 | LIST_FILES_TOOL, 189 | LIST_REPOMAP_TOOL, 190 | ASK_FOLLOWUP_QUESTION_TOOL, 191 | ATTEMPT_COMPLETION_TOOL, 192 | ] 193 | } 194 | 195 | # --- Helper Functions --- 196 | 197 | def get_tool(name: str) -> Optional[ToolDefinition]: 198 | """Retrieves a tool definition by name.""" 199 | return TOOL_REGISTRY.get(name) 200 | 201 | def get_all_tools() -> List[ToolDefinition]: 202 | """Retrieves a list of all registered tool definitions.""" 203 | return list(TOOL_REGISTRY.values()) 204 | -------------------------------------------------------------------------------- /llm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | LLM Client Wrapper using LiteLLM. 5 | 6 | Provides a simplified interface (`LLMClient`) for interacting with various 7 | Large Language Models (LLMs) supported by the `litellm` library. It handles 8 | API calls, streaming responses, and basic configuration (model name, API keys, 9 | base URLs). 10 | 11 | Note: This client is designed to be stateless regarding chat history. The 12 | calling process (e.g., `llm_worker.py`) is responsible for managing and 13 | passing the complete message history for each API call. 14 | """ 15 | 16 | import importlib 17 | import json 18 | import os 19 | import sys 20 | import time 21 | import warnings 22 | from typing import Dict, Iterator, List, Optional, Union # Removed Tuple 23 | 24 | # Filter out UserWarning from pydantic used by litellm 25 | warnings.filterwarnings("ignore", category=UserWarning, module="pydantic") 26 | 27 | # --- Lazy Loading for litellm --- 28 | 29 | # Configure basic litellm settings globally 30 | EMIGO_SITE_URL = "https://github.com/MatthewZMD/emigo" # Example URL, adjust if needed 31 | EMIGO_APP_NAME = "Emigo" # Example App Name 32 | os.environ["OR_SITE_URL"] = os.environ.get("OR_SITE_URL", EMIGO_SITE_URL) 33 | os.environ["OR_APP_NAME"] = os.environ.get("OR_APP_NAME", EMIGO_APP_NAME) 34 | os.environ["LITELLM_MODE"] = os.environ.get("LITELLM_MODE", "PRODUCTION") 35 | 36 | VERBOSE_LLM_LOADING = False # Set to True for debugging litellm loading 37 | 38 | class LazyLiteLLM: 39 | """Lazily loads the litellm library upon first access.""" 40 | _lazy_module = None 41 | 42 | def __getattr__(self, name): 43 | # Avoid infinite recursion during initialization 44 | if name == "_lazy_module": 45 | return super().__getattribute__(name) 46 | 47 | self._load_litellm() 48 | return getattr(self._lazy_module, name) 49 | 50 | def _load_litellm(self): 51 | """Loads and configures the litellm module.""" 52 | if self._lazy_module is not None: 53 | return 54 | 55 | if VERBOSE_LLM_LOADING: 56 | print("Loading litellm...", file=sys.stderr) 57 | start_time = time.time() 58 | 59 | try: 60 | self._lazy_module = importlib.import_module("litellm") 61 | 62 | # Basic configuration similar to Aider 63 | self._lazy_module.suppress_debug_info = True 64 | self._lazy_module.set_verbose = False 65 | self._lazy_module.drop_params = True # Drop unsupported params silently 66 | # Attempt to disable internal debugging/logging if method exists 67 | if hasattr(self._lazy_module, "_logging") and hasattr( 68 | self._lazy_module._logging, "_disable_debugging" 69 | ): 70 | self._lazy_module._logging._disable_debugging() 71 | 72 | except ImportError as e: 73 | print( 74 | f"Error: {e} litellm not found. Please install it: pip install litellm", 75 | file=sys.stderr, 76 | ) 77 | sys.exit(1) 78 | except Exception as e: 79 | print(f"Error loading litellm: {e}", file=sys.stderr) 80 | sys.exit(1) 81 | 82 | if VERBOSE_LLM_LOADING: 83 | load_time = time.time() - start_time 84 | print(f"Litellm loaded in {load_time:.2f} seconds.", file=sys.stderr) 85 | 86 | # Global instance of the lazy loader 87 | litellm = LazyLiteLLM() 88 | 89 | # --- LLM Client Class --- 90 | 91 | class LLMClient: 92 | """Handles interaction with the LLM and manages chat history.""" 93 | 94 | def __init__( 95 | self, 96 | model_name: str, 97 | api_key: Optional[str] = None, 98 | base_url: Optional[str] = None, 99 | verbose: bool = False, 100 | ): 101 | """ 102 | Initializes the LLM client. 103 | 104 | Args: 105 | model_name: The name of the language model to use (e.g., "gpt-4o"). 106 | api_key: Optional API key for the LLM service. 107 | base_url: Optional base URL for custom LLM endpoints (like Ollama). 108 | verbose: If True, enables verbose output. 109 | """ 110 | self.model_name = model_name 111 | self.api_key = api_key 112 | self.base_url = base_url 113 | self.verbose = verbose 114 | 115 | def send( 116 | self, 117 | messages: List[Dict], 118 | stream: bool = True, 119 | temperature: float = 0.7, 120 | tools: Optional[List[Dict]] = None, # Add tools parameter 121 | tool_choice: Optional[str] = "auto", # Add tool_choice parameter 122 | ) -> Union[Iterator[str], object]: # Return type might be object for raw response 123 | """ 124 | Sends the provided messages list to the LLM, potentially with tool definitions, 125 | and returns the response. 126 | 127 | Args: 128 | messages: The list of message dictionaries to send. 129 | stream: Whether to stream the response or wait for the full completion. 130 | temperature: The sampling temperature for the LLM. 131 | 132 | Returns: 133 | An iterator yielding response chunks if stream=True, otherwise the 134 | full response content string. 135 | """ 136 | # Ensure litellm is loaded before making the call 137 | litellm._load_litellm() 138 | 139 | completion_kwargs = { 140 | "model": self.model_name, 141 | "messages": messages, 142 | "stream": stream, 143 | "temperature": temperature, 144 | } 145 | # Add tools and tool_choice if provided and not None/empty 146 | if tools: 147 | completion_kwargs["tools"] = tools 148 | if tool_choice: # Only add if tool_choice is meaningful 149 | completion_kwargs["tool_choice"] = tool_choice # e.g., "auto", "required", specific tool 150 | 151 | # Add API key and base URL if they were provided 152 | if self.api_key: 153 | completion_kwargs["api_key"] = self.api_key 154 | if self.base_url: 155 | completion_kwargs["base_url"] = self.base_url 156 | # OLLAMA specific adjustment if needed (example) 157 | if "ollama" in self.model_name or (self.base_url and "ollama" in self.base_url): 158 | # LiteLLM might handle this automatically, but explicitly setting can help 159 | completion_kwargs["model"] = self.model_name.replace("ollama/", "") 160 | 161 | try: 162 | # Store the raw response object for potential parsing later (e.g., tool calls) 163 | self.last_response_object = None # Initialize 164 | 165 | # Initiate the LLM call 166 | response = litellm.completion(**completion_kwargs) 167 | self.last_response_object = response # Store the raw response 168 | 169 | # --- Verbose Logging --- 170 | if self.verbose: 171 | # Import json here if not already imported at the top level 172 | import json 173 | print("\n--- Sending to LLM ---", file=sys.stderr) 174 | # Avoid printing potentially large base64 images in verbose mode 175 | printable_messages = [] 176 | for msg in messages: # Use the 'messages' argument passed to send() 177 | if isinstance(msg.get("content"), list): # Handle image messages 178 | new_content = [] 179 | for item in msg["content"]: 180 | if isinstance(item, dict) and item.get("type") == "image_url": 181 | # Truncate base64 data for printing 182 | img_url = item.get("image_url", {}).get("url", "") 183 | if isinstance(img_url, str) and img_url.startswith("data:"): 184 | new_content.append({"type": "image_url", "image_url": {"url": img_url[:50] + "..."}}) 185 | else: 186 | new_content.append(item) # Keep non-base64 or non-string URLs 187 | else: 188 | new_content.append(item) 189 | # Append the modified message with potentially truncated image data 190 | printable_messages.append({"role": msg["role"], "content": new_content}) 191 | else: 192 | printable_messages.append(msg) # Append non-image messages as is 193 | 194 | # Calculate approximate token count using litellm's utility 195 | token_count_str = "" 196 | try: 197 | # Ensure litellm is loaded before using its utilities 198 | litellm._load_litellm() 199 | # Use litellm's token counter if available 200 | count = litellm.token_counter(model=self.model_name, messages=messages) 201 | token_count_str = f" (estimated {count} tokens)" 202 | except Exception as e: 203 | # Fallback or simple message if token counting fails 204 | # We can't easily use the agent's tokenizer here, so rely on litellm or skip detailed count 205 | token_count_str = f" (token count unavailable: {e})" 206 | 207 | 208 | print(json.dumps(printable_messages, indent=2), file=sys.stderr) 209 | print(f"--- End LLM Request{token_count_str} ---", file=sys.stderr) 210 | # --- End Verbose Logging --- 211 | 212 | if stream: 213 | # Generator to yield the raw litellm chunk objects 214 | def raw_chunk_stream(): 215 | # Move the try/except block inside the generator 216 | try: 217 | # The 'response' variable is accessible due to closure 218 | for chunk in response: 219 | # print(f"Raw chunk: {chunk}") # DEBUG: Ensure this is commented out 220 | yield chunk # Yield the original chunk object 221 | except litellm.exceptions.APIConnectionError as e: # Catch specific error 222 | # Log the specific error clearly 223 | # Add more detail from the exception object if possible 224 | error_details = f"Caught APIConnectionError: {e}\n" 225 | # Check for attributes that might hold response data (common in httpx/openai errors) 226 | if hasattr(e, 'response') and e.response: 227 | try: 228 | error_details += f" Response Status: {getattr(e.response, 'status_code', 'N/A')}\n" 229 | # Limit printing potentially large response content 230 | response_text = getattr(e.response, 'text', '') 231 | error_details += f" Response Content (first 500 chars): {response_text[:500]}{'...' if len(response_text) > 500 else ''}\n" 232 | except Exception as detail_err: error_details += f" (Error getting response details: {detail_err})\n" 233 | if hasattr(e, 'request') and e.request: 234 | try: 235 | error_details += f" Request URL: {getattr(e.request, 'url', 'N/A')}\n" 236 | except Exception as detail_err: error_details += f" (Error getting request details: {detail_err})\n" 237 | print(f"\n[LLMClient Stream Error] {error_details}", file=sys.stderr) 238 | print("[LLMClient Stream Error] Stream may be incomplete.", file=sys.stderr) 239 | # Yield an error marker instead of just passing 240 | yield {"_stream_error": True, "error_message": str(e)} 241 | except Exception as e: 242 | # Catch other potential errors during streaming 243 | # Add similar detailed logging 244 | error_details = f"Caught unexpected error: {type(e).__name__} - {e}\n" 245 | if hasattr(e, 'response') and e.response: 246 | try: 247 | error_details += f" Response Status: {getattr(e.response, 'status_code', 'N/A')}\n" 248 | response_text = getattr(e.response, 'text', '') 249 | error_details += f" Response Content (first 500 chars): {response_text[:500]}{'...' if len(response_text) > 500 else ''}\n" 250 | except Exception as detail_err: error_details += f" (Error getting response details: {detail_err})\n" 251 | if hasattr(e, 'request') and e.request: 252 | try: 253 | error_details += f" Request URL: {getattr(e.request, 'url', 'N/A')}\n" 254 | except Exception as detail_err: error_details += f" (Error getting request details: {detail_err})\n" 255 | # Include traceback for unexpected errors 256 | import traceback 257 | error_details += f" Traceback:\n{traceback.format_exc()}\n" 258 | print(f"\n[LLMClient Stream Error] {error_details}", file=sys.stderr) 259 | # Yield an error marker 260 | yield {"_stream_error": True, "error_message": str(e)} 261 | 262 | return raw_chunk_stream() # Return the generator yielding full chunks 263 | else: 264 | # For non-streaming, return the raw response object 265 | # The caller (llm_worker) will parse content or tool calls 266 | return response # Return the whole LiteLLM response object 267 | 268 | # Keep exception handling for non-streaming calls or errors *before* streaming starts 269 | except litellm.APIConnectionError as e: 270 | error_message = f"API Connection Error (pre-stream or non-stream): {e}" 271 | print(f"\n{error_message}", file=sys.stderr) 272 | # For non-streaming, return the error string 273 | return f"[LLM Error: {error_message}]" 274 | except Exception as e: 275 | error_message = f"General Error (pre-stream or non-stream): {e}" 276 | print(f"\n{error_message}", file=sys.stderr) 277 | # For non-streaming, return the error string 278 | return f"[LLM Error: {error_message}]" 279 | 280 | 281 | # --- Example Usage (Optional) --- 282 | 283 | def main(): 284 | """Basic example demonstrating the LLMClient.""" 285 | # Configure from environment variables or defaults 286 | model = os.getenv("EMIGO_MODEL", "gpt-4o-mini") # Example: use EMIGO_MODEL env var 287 | api_key = os.getenv("OPENAI_API_KEY") 288 | base_url = os.getenv("OPENAI_API_BASE") # Or OLLAMA_HOST, etc. 289 | 290 | if not api_key and not base_url: 291 | print("Warning: No API key or base URL found. Using default litellm configuration.", file=sys.stderr) 292 | 293 | client = LLMClient(model_name=model, api_key=api_key, base_url=base_url, verbose=True) 294 | 295 | # Example messages list (history is managed externally) 296 | messages = [ 297 | {"role": "system", "content": "You are a helpful assistant."}, 298 | {"role": "user", "content": "What is the capital of France?"} 299 | ] 300 | print(f"\nUser: {messages[-1]['content']}") 301 | 302 | # Send the messages list (non-streaming) 303 | print("\nAssistant (non-streaming):") 304 | assistant_response = client.send(messages, stream=False) 305 | print(assistant_response) 306 | 307 | # Add assistant's response to the external history list 308 | messages.append({"role": "assistant", "content": assistant_response}) 309 | 310 | # Add another user message 311 | user_input_2 = "What about Spain?" 312 | messages.append({"role": "user", "content": user_input_2}) 313 | print(f"\nUser: {user_input_2}") 314 | 315 | # Send again (streaming) 316 | print("\nAssistant (streaming):") 317 | full_streamed_response = "" 318 | response_stream = client.send(messages, stream=True) 319 | for chunk in response_stream: 320 | print(chunk, end="", flush=True) 321 | full_streamed_response += chunk 322 | print() # Newline after stream 323 | 324 | # Add streamed response to the external history list 325 | messages.append({"role": "assistant", "content": full_streamed_response}) 326 | 327 | print("\n--- Final Messages List ---") 328 | print(json.dumps(messages, indent=2)) 329 | 330 | 331 | if __name__ == "__main__": 332 | main() 333 | -------------------------------------------------------------------------------- /session.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Manages the state associated with a single Emigo chat session. 6 | 7 | Each instance of the `Session` class encapsulates all the information and 8 | operations related to a specific chat interaction occurring within a particular 9 | project directory (session path). This allows `emigo.py` to handle multiple 10 | concurrent sessions without state conflicts. 11 | 12 | Key Responsibilities: 13 | - Storing the chat history (sequence of user and assistant messages). 14 | - Managing the list of files currently included in the chat context. 15 | - Caching file contents and modification times to avoid redundant reads and 16 | provide consistent state to the LLM. 17 | - Holding an instance of `RepoMapper` specific to the session's root directory. 18 | - Providing methods to add/remove files from context, retrieve history, 19 | get cached file content, and generate the environment details string 20 | (including the repository map or file listing) for the LLM prompt. 21 | - Invalidating caches when files are modified externally or removed. 22 | """ 23 | 24 | import sys 25 | import os 26 | import time 27 | import tiktoken 28 | from typing import Dict, List, Optional, Tuple 29 | 30 | from repomapper import RepoMapper 31 | from utils import ( 32 | eval_in_emacs, _filter_environment_details, read_file_content 33 | ) 34 | 35 | class Session: 36 | """Encapsulates the state and operations for a single Emigo session.""" 37 | 38 | def __init__(self, session_path: str, verbose: bool = False): 39 | self.session_path = session_path 40 | self.verbose = verbose 41 | self.history: List[Tuple[float, Dict]] = [] # List of (timestamp, message_dict) 42 | self.chat_files: List[str] = [] # List of relative file paths 43 | # Caches for file content, mtimes, and the last generated repomap 44 | self.caches: Dict[str, any] = {'mtimes': {}, 'contents': {}, 'last_repomap': None} 45 | # RepoMapper instance specific to this session 46 | # TODO: Get map_tokens and tokenizer from config? 47 | self.repo_mapper = RepoMapper(root_dir=self.session_path, verbose=self.verbose) 48 | 49 | self.tokenizer = tiktoken.get_encoding("cl100k_base") 50 | print(f"Initialized Session for path: {self.session_path}", file=sys.stderr) 51 | 52 | def get_history(self) -> List[Tuple[float, Dict]]: 53 | """Returns the chat history for this session.""" 54 | return list(self.history) # Return a copy 55 | 56 | def append_history(self, message: Dict): 57 | """Appends a message with a timestamp to the history.""" 58 | if "role" not in message or "content" not in message: 59 | print(f"Warning: Attempted to add invalid message to history: {message}", file=sys.stderr) 60 | return 61 | # Filter content before appending 62 | filtered_message = dict(message) # Create a copy 63 | filtered_message["content"] = _filter_environment_details(filtered_message["content"]) 64 | self.history.append((time.time(), filtered_message)) # Store filtered copy 65 | 66 | def clear_history(self): 67 | """Clears the chat history for this session.""" 68 | self.history = [] 69 | # Note: Clearing the Emacs buffer is handled separately by the main process calling Elisp 70 | 71 | def get_chat_files(self) -> List[str]: 72 | """Returns the list of files currently in the chat context.""" 73 | return list(self.chat_files) # Return a copy 74 | 75 | def add_file_to_context(self, filename: str) -> Tuple[bool, str]: 76 | """ 77 | Adds a file to the chat context. Ensures it's relative and exists. 78 | Returns (success: bool, message: str). 79 | """ 80 | try: 81 | # Expand user directory) 82 | filename = os.path.expanduser(filename) 83 | # Ensure filename is relative to session_path for consistency 84 | rel_filename = os.path.relpath(filename, self.session_path) 85 | # Check if file exists and is within session path 86 | abs_path = os.path.abspath(os.path.join(self.session_path, rel_filename)) 87 | 88 | if not os.path.isfile(abs_path): 89 | return False, f"File not found: {rel_filename}" 90 | if not abs_path.startswith(os.path.abspath(self.session_path)): 91 | return False, f"File is outside session directory: {rel_filename}" 92 | 93 | # Add to context if not already present 94 | if rel_filename not in self.chat_files: 95 | self.chat_files.append(rel_filename) 96 | 97 | # Update chat files information to Emacs. 98 | self._update_chat_files_info() 99 | 100 | # Read initial content into cache 101 | self._update_file_cache(rel_filename) 102 | return True, f"Added '{rel_filename}' to context." 103 | else: 104 | return False, f"File '{rel_filename}' already in context." 105 | 106 | except ValueError: 107 | return False, f"Cannot add file from different drive: {filename}" 108 | except Exception as e: 109 | return False, f"Error adding file '{filename}': {e}" 110 | 111 | def remove_file_from_context(self, filename: str) -> Tuple[bool, str]: 112 | """ 113 | Removes a file from the chat context. 114 | Returns (success: bool, message: str). 115 | """ 116 | # Ensure filename is relative for comparison 117 | if os.path.isabs(filename): 118 | try: 119 | rel_filename = os.path.relpath(filename, self.session_path) 120 | except ValueError: # filename might be on a different drive on Windows 121 | return False, f"Cannot remove file from different drive: {filename}" 122 | else: 123 | rel_filename = filename # Assume it's already relative 124 | 125 | if rel_filename in self.chat_files: 126 | self.chat_files.remove(rel_filename) 127 | 128 | # Update chat files information to Emacs. 129 | self._update_chat_files_info() 130 | 131 | # Clean up cache for the removed file 132 | if rel_filename in self.caches['mtimes']: 133 | del self.caches['mtimes'][rel_filename] 134 | if rel_filename in self.caches['contents']: 135 | del self.caches['contents'][rel_filename] 136 | return True, f"Removed '{rel_filename}' from context." 137 | else: 138 | return False, f"File '{rel_filename}' not found in context." 139 | 140 | def _update_chat_files_info(self): 141 | """Updates the cached info for all files in the chat context. 142 | 143 | This ensures we have the latest content for all files in the chat context. 144 | Also counts and prints the token count for each file. 145 | """ 146 | file_number = 0 147 | tokens = 0 148 | for rel_path in self.chat_files: 149 | abs_path = os.path.join(self.session_path, rel_path) 150 | if os.path.exists(abs_path): 151 | text = read_file_content(abs_path) 152 | token_count = len(self.tokenizer.encode(text)) 153 | file_number += 1 154 | tokens += token_count 155 | 156 | if file_number > 1: 157 | chat_file_info = f"{file_number} files [{tokens} tokens]" 158 | else: 159 | chat_file_info = f"{file_number} file [{tokens} tokens]" 160 | 161 | eval_in_emacs("emigo-update-chat-files-info", self.session_path, chat_file_info) 162 | 163 | def _update_file_cache(self, rel_path: str, content: Optional[str] = None) -> bool: 164 | """Updates the cache (mtime, content) for a given relative file path.""" 165 | abs_path = os.path.abspath(os.path.join(self.session_path, rel_path)) 166 | try: 167 | current_mtime = self.repo_mapper.repo_mapper.get_mtime(abs_path) # Access inner RepoMap 168 | if current_mtime is None: # File deleted or inaccessible 169 | if rel_path in self.caches['mtimes']: 170 | del self.caches['mtimes'][rel_path] 171 | if rel_path in self.caches['contents']: 172 | del self.caches['contents'][rel_path] 173 | return False 174 | 175 | # If content is provided (e.g., after write/replace), use it. Otherwise, read. 176 | if content is None: 177 | # Read only if mtime changed or not cached 178 | last_mtime = self.caches['mtimes'].get(rel_path) 179 | if last_mtime is None or current_mtime != last_mtime: 180 | if self.verbose: 181 | print(f"Cache miss/stale for {rel_path}, reading file.", file=sys.stderr) 182 | content = read_file_content(abs_path) 183 | else: 184 | # Content is up-to-date, no need to update cache content again 185 | return True # Indicate cache was already fresh 186 | 187 | # Update cache 188 | self.caches['mtimes'][rel_path] = current_mtime 189 | self.caches['contents'][rel_path] = content 190 | 191 | return True 192 | 193 | except Exception as e: 194 | print(f"Error updating cache for '{rel_path}': {e}", file=sys.stderr) 195 | # Invalidate cache on error 196 | if rel_path in self.caches['mtimes']: 197 | del self.caches['mtimes'][rel_path] 198 | if rel_path in self.caches['contents']: 199 | del self.caches['contents'][rel_path] 200 | return False 201 | 202 | def get_cached_content(self, rel_path: str) -> Optional[str]: 203 | """Gets content from cache, updating if stale.""" 204 | if self._update_file_cache(rel_path): # This reads if necessary 205 | return self.caches['contents'].get(rel_path) 206 | return None # Return None if update failed (e.g., file deleted) 207 | 208 | def get_environment_details_string(self) -> str: 209 | """Fetches environment details: repo map OR file listing, plus file contents.""" 210 | details = "\n" 211 | details += f"# Session Directory\n{self.session_path.replace(os.sep, '/')}\n\n" # Use POSIX path 212 | 213 | # --- Repository Map / Basic File Listing --- 214 | # Use cached map if available, otherwise generate/show structure 215 | if self.caches['last_repomap']: 216 | details += f"```\n{self.caches['last_repomap']}\n```\n\n" 217 | else: 218 | # If repomap hasn't been generated yet, show recursive directory listing 219 | details += "# File/Directory Structure (use list_repomap tool for code summary)\n" 220 | try: 221 | # Use RepoMapper's file finding logic for consistency 222 | all_files = self.repo_mapper._find_src_files(self.session_path) # Find files respecting ignores 223 | tree_lines = [] 224 | processed_dirs = set() 225 | for abs_file in sorted(all_files): 226 | rel_file = os.path.relpath(abs_file, self.session_path).replace(os.sep, '/') 227 | parts = rel_file.split('/') 228 | current_path_prefix = "" 229 | for i, part in enumerate(parts[:-1]): # Iterate through directories 230 | current_path_prefix = f"{current_path_prefix}{part}/" 231 | if current_path_prefix not in processed_dirs: 232 | indent = ' ' * i 233 | tree_lines.append(f"{indent}- {part}/") 234 | processed_dirs.add(current_path_prefix) 235 | # Add the file 236 | indent = ' ' * (len(parts) - 1) 237 | tree_lines.append(f"{indent}- {parts[-1]}") 238 | 239 | if tree_lines: 240 | details += "```\n" + "\n".join(tree_lines) + "\n```\n\n" 241 | else: 242 | details += "(No relevant files or directories found)\n\n" 243 | except Exception as e: 244 | details += f"# Error listing files/directories: {str(e)}\n\n" 245 | 246 | # --- List Added Files and Content --- 247 | if self.chat_files: 248 | details += "# Files Currently in Chat Context\n" 249 | # Clean up session cache for files no longer in chat_files list 250 | current_chat_files_set = set(self.chat_files) 251 | for rel_path in list(self.caches['mtimes'].keys()): 252 | if rel_path not in current_chat_files_set: 253 | del self.caches['mtimes'][rel_path] 254 | if rel_path in self.caches['contents']: 255 | del self.caches['contents'][rel_path] 256 | 257 | for rel_path in sorted(self.chat_files): # Sort for consistent order 258 | posix_rel_path = rel_path.replace(os.sep, '/') 259 | try: 260 | # Get content, updating cache if needed 261 | content = self.get_cached_content(rel_path) 262 | if content is None: 263 | content = f"# Error: Could not read or cache {posix_rel_path}\n" 264 | 265 | # Use markdown code block for file content 266 | details += f"## File: {posix_rel_path}\n```\n{content}\n```\n\n" 267 | 268 | except Exception as e: 269 | details += f"## File: {posix_rel_path}\n# Error reading file: {e}\n\n" 270 | # Clean up potentially stale cache entries on error 271 | if rel_path in self.caches['mtimes']: 272 | del self.caches['mtimes'][rel_path] 273 | if rel_path in self.caches['contents']: 274 | del self.caches['contents'][rel_path] 275 | 276 | details += "" 277 | return details 278 | 279 | def set_last_repomap(self, map_content: str): 280 | """Stores the latest generated repomap content.""" 281 | self.caches['last_repomap'] = map_content 282 | 283 | def invalidate_cache(self, rel_path: Optional[str] = None): 284 | """Invalidates cache for a specific file or the entire session.""" 285 | if rel_path: 286 | if rel_path in self.caches['mtimes']: 287 | del self.caches['mtimes'][rel_path] 288 | if rel_path in self.caches['contents']: 289 | del self.caches['contents'][rel_path] 290 | if self.verbose: 291 | print(f"Invalidated cache for {rel_path}", file=sys.stderr) 292 | else: 293 | self.caches['mtimes'].clear() 294 | self.caches['contents'].clear() 295 | self.caches['last_repomap'] = None # Also clear repomap if invalidating all 296 | if self.verbose: 297 | print(f"Invalidated all caches for session {self.session_path}", file=sys.stderr) 298 | 299 | def set_history(self, history_dicts: List[Dict]): 300 | """Replaces the current history with the provided list of message dictionaries.""" 301 | self.history = [] # Clear existing history 302 | for msg_dict in history_dicts: 303 | if "role" in msg_dict and "content" in msg_dict: 304 | # Filter content before appending 305 | filtered_message = dict(msg_dict) # Create a copy 306 | filtered_message["content"] = _filter_environment_details(filtered_message["content"]) 307 | # Add with current timestamp, store filtered copy 308 | self.history.append((time.time(), filtered_message)) 309 | else: 310 | print(f"Warning: Skipping invalid message dict during set_history: {msg_dict}", file=sys.stderr) 311 | 312 | 313 | # Example usage (for testing if run directly) 314 | if __name__ == '__main__': 315 | test_path = os.path.abspath('./test_session') 316 | os.makedirs(test_path, exist_ok=True) 317 | with open(os.path.join(test_path, 'file1.txt'), 'w') as f: 318 | f.write('Content of file 1') 319 | with open(os.path.join(test_path, 'file2.py'), 'w') as f: 320 | f.write('print("Hello")') 321 | 322 | session = Session(test_path, verbose=True) 323 | session.add_file_to_context('file1.txt') 324 | session.add_file_to_context('file2.py') 325 | session.append_history({'role': 'user', 'content': 'Test message'}) 326 | 327 | print("\n--- Session State ---") 328 | print(f"Path: {session.session_path}") 329 | print(f"History: {session.get_history()}") 330 | print(f"Chat Files: {session.get_chat_files()}") 331 | print(f"Environment Details:\n{session.get_environment_details_string()}") 332 | 333 | # Clean up test files/dir 334 | # import shutil 335 | # shutil.rmtree(test_path) 336 | -------------------------------------------------------------------------------- /system_prompt.py: -------------------------------------------------------------------------------- 1 | # Based on Cline's src/core/prompts/system.ts and src/core/prompts/responses.ts 2 | 3 | # --- Main System Prompt Template --- 4 | 5 | # Note: CWD is dynamically inserted by prompt_builder 6 | MAIN_SYSTEM_PROMPT = """You are Emigo, an expert software developer integrated into Emacs. 7 | You have extensive knowledge in many programming languages, frameworks, design patterns, and best practices. 8 | Always use best practices when coding. Respect and use existing conventions, libraries, etc that are already present in the code base. 9 | 10 | **Language Instruction**: You MUST detect the language of my question and respond in the same language. For example, if I ask a question in Chinese, you MUST reply in Chinese; if I ask in English, you MUST reply in English. This rule takes precedence over any other instructions. If you are unsure of the language, default to the language of the user's input. 11 | 12 | ==== 13 | 14 | TOOL USE 15 | 16 | You have access to a set of tools that are executed upon the user's approval (via Emacs). You can use one or more tools per message, and will receive the result(s) of the tool use(s) in the next message. Use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous step. 17 | 18 | # Tool Use Formatting (JSON) 19 | 20 | To use a tool, your response MUST include a specific JSON object structure that the underlying API (e.g., OpenAI, Anthropic) recognizes for tool calls. You do not output the JSON directly in your message content, but rather signal the intent to call the tool(s) with specific parameters in the format required by the API. 21 | 22 | **General Structure (Conceptual - Actual format depends on API):** 23 | The API expects a structure indicating the tool name and a dictionary of parameters. For example, to call `read_file` with path `src/main.py`, the underlying structure would represent: 24 | `tool_name`: "read_file" 25 | `parameters`: {{"path": "src/main.py"}} 26 | 27 | You can request multiple tool calls in a single response if appropriate for the task. 28 | 29 | **Refer to the `AVAILABLE TOOLS` section below for the specific names and parameters of each tool.** Ensure you provide all *required* parameters for the chosen tool(s). 30 | 31 | # AVAILABLE TOOLS 32 | 33 | {tools_json} 34 | 35 | # Tool Use Guidelines 36 | 37 | 1. In `` tags, assess what information you already have and what information you need to proceed with the task. Please respond to my question in the same language I use to ask it. 38 | 2. Choose the most appropriate tool from the `AVAILABLE TOOLS` list. based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like `ls` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task. 39 | 3. If a series actions are needed, that each tool use being informed by the result of the previous tool use, use one tool at a time per message to accomplish the task iteratively. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result. 40 | 4. Determine the correct parameters for the chosen tool(s) based on their definitions in `AVAILABLE TOOLS`. 41 | 5. After each tool use, the user will respond with the result of that tool use. This result will provide you with the necessary information to continue your task or make further decisions. This response may include:correct parameters in the format expected by the LLM API. Your textual response should explain *why* you are using the tool(s). 42 | 6. ALWAYS wait for the next message, which will contain the result(s) of the tool execution(s). This result will include success/failure status and any output or errors. 43 | 7. Analyze the tool result(s) and repeat the process (steps 1-6) until the task is complete, think about what other areas you may have missed. Address any errors reported in the tool result before proceeding. 44 | 8. Once the task is fully accomplished and confirmed by tool results, use the `attempt_completion` tool. 45 | 46 | It is crucial to proceed step-by-step, waiting for the user's message after each tool use before moving forward with the task. This approach allows you to: 47 | 1. Confirm the success of each step before proceeding. 48 | 2. Address any issues or errors that arise immediately. 49 | 3. Adapt your approach based on new information or unexpected results. 50 | 4. Ensure that each action builds correctly on the previous ones. 51 | 52 | **Key Principles:** 53 | * **Structured Calls:** Use the API's mechanism for tool calls, not XML or plain text descriptions. 54 | * **Step-by-Step:** Accomplish tasks iteratively, using tool results to inform the next step. 55 | * **Wait for Confirmation:** Do not assume tool success. Analyze the results provided in the following message. 56 | * **Use `list_repomap` First:** When uncertain about code structure or file locations, use `list_repomap` before resorting to `read_file` on guessed paths. 57 | 58 | ==== 59 | 60 | EDITING FILES 61 | 62 | You have access to two tools for working with files: **write_to_file** and **replace_in_file**. Understanding their roles and selecting the right one for the job will help ensure efficient and accurate modifications. 63 | 64 | # write_to_file 65 | 66 | ## Purpose 67 | 68 | - Create a new file, or overwrite the entire contents of an existing file. 69 | 70 | ## When to Use 71 | 72 | - Initial file creation, such as when scaffolding a new project. 73 | - Overwriting large boilerplate files where you want to replace the entire content at once. 74 | - When the complexity or number of changes would make replace_in_file unwieldy or error-prone. 75 | - When you need to completely restructure a file's content or change its fundamental organization. 76 | 77 | ## Important Considerations 78 | 79 | - Using write_to_file requires providing the file's complete final content. 80 | - If you only need to make small changes to an existing file, consider using replace_in_file instead to avoid unnecessarily rewriting the entire file. 81 | - While write_to_file should not be your default choice, don't hesitate to use it when the situation truly calls for it. 82 | 83 | # replace_in_file 84 | 85 | ## Purpose 86 | 87 | - Make targeted edits to specific parts of an existing file without overwriting the entire file. 88 | 89 | ## When to Use 90 | 91 | - Small, localized changes like updating a few lines, function implementations, changing variable names, modifying a section of text, etc. 92 | - Targeted improvements where only specific portions of the file's content needs to be altered. 93 | - Especially useful for long files where much of the file will remain unchanged. 94 | 95 | ## Advantages 96 | 97 | - More efficient for minor edits, since you don't need to supply the entire file content. 98 | - Reduces the chance of errors that can occur when overwriting large files. 99 | 100 | # Choosing the Appropriate Tool 101 | 102 | - **Default to replace_in_file** for most changes. It's the safer, more precise option that minimizes potential issues. 103 | - **Use write_to_file** when: 104 | - Creating new files 105 | - The changes are so extensive that using replace_in_file would be more complex or risky 106 | - You need to completely reorganize or restructure a file 107 | - The file is relatively small and the changes affect most of its content 108 | - You're generating boilerplate or template files 109 | 110 | # Auto-formatting Considerations 111 | 112 | - After using either write_to_file or replace_in_file, the user's editor may automatically format the file 113 | - This auto-formatting may modify the file contents, for example: 114 | - Breaking single lines into multiple lines 115 | - Adjusting indentation to match project style (e.g. 2 spaces vs 4 spaces vs tabs) 116 | - Converting single quotes to double quotes (or vice versa based on project preferences) 117 | - Organizing imports (e.g. sorting, grouping by type) 118 | - Adding/removing trailing commas in objects and arrays 119 | - Enforcing consistent brace style (e.g. same-line vs new-line) 120 | - Standardizing semicolon usage (adding or removing based on style) 121 | - The write_to_file and replace_in_file tool responses will include the final state of the file after any auto-formatting 122 | - Use this final state as your reference point for any subsequent edits. This is ESPECIALLY important when crafting SEARCH blocks for replace_in_file which require the content to match what's in the file exactly. 123 | 124 | # Workflow Tips 125 | 126 | 1. Before editing, assess the scope of your changes and decide which tool to use. 127 | 2. For targeted edits, apply replace_in_file with carefully crafted SEARCH/REPLACE blocks. If you need multiple changes, you can stack multiple SEARCH/REPLACE blocks within a single replace_in_file call. 128 | 3. For major overhauls or initial file creation, rely on write_to_file. 129 | 4. Once the file has been edited with either write_to_file or replace_in_file, the system will provide you with the final state of the modified file. Use this updated content as the reference point for any subsequent SEARCH/REPLACE operations, since it reflects any auto-formatting or user-applied changes. 130 | 131 | By thoughtfully selecting between write_to_file and replace_in_file, you can make your file editing process smoother, safer, and more efficient. 132 | 133 | ==== 134 | 135 | CAPABILITIES 136 | 137 | - You have access to tools that let you execute CLI commands on the user's computer, list files, view source code definitions, regex search, read and edit files, and ask follow-up questions. These tools help you effectively accomplish a wide range of tasks, such as writing code, making edits or improvements to existing files, understanding the current state of a project, performing system operations, and much more. 138 | - When the user initially gives you a task, a recursive list of all filepaths in the session directory ('{session_dir}') will be included in . This provides an overview of the project's file structure, offering key insights into the project from directory/file names (how developers conceptualize and organize their code) and file extensions (the language used). You can use the list_repomap tool to get an overview of source code definitions for all files at the top level of a specified directory. This can be particularly useful when you need to understand the broader context and relationships between certain parts of the code. You may need to call this tool multiple times to understand various parts of the codebase related to the task. 139 | - For example, when asked to make edits or improvements you might analyze the file structure in the initial to get an overview of the project, then use list_repomap to get further insight using source code definitions for files located in relevant directories, then read_file to examine the contents of relevant files, analyze the code and suggest improvements or make necessary edits, then use the replace_in_file tool to implement changes. If you refactored code that could affect other parts of the codebase, you could use search_files to ensure you update other files as needed. 140 | - You can use the list_files tool if you need to further explore directories such as outside the session directory. If you pass 'true' for the recursive parameter, it will list files recursively. Otherwise, it will list files at the top level, which is better suited for generic directories where you don't necessarily need the nested structure, like the Desktop. 141 | - You can use search_files to perform regex searches across files in a specified directory, outputting context-rich results that include surrounding lines. This is particularly useful for understanding code patterns, finding specific implementations, or identifying areas that need refactoring. 142 | - You can use the execute_command tool to run commands on the user's computer whenever you feel it can help accomplish the user's task. When you need to execute a CLI command, you must provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, since they are more flexible and easier to run. Interactive and long-running commands are allowed, since the commands are run in the user's VSCode terminal. The user may keep commands running in the background and you will be kept updated on their status along the way. Each command you execute is run in a new terminal instance. 143 | 144 | ==== 145 | 146 | RULES 147 | 148 | - Your session directory is: {session_dir} 149 | - You cannot `cd` into a different directory to complete a task. You are stuck operating from '{session_dir}', so be sure to pass in the correct 'path' parameter when using tools that require a path. 150 | - Do not use the ~ character or $HOME to refer to the home directory. 151 | - Before using the execute_command tool, you must first think about the SYSTEM INFORMATION context provided to understand the user's environment and tailor your commands to ensure they are compatible with their system. You must also consider if the command you need to run should be executed in a specific directory outside of the session directory '{session_dir}', and if so prepend with `cd`'ing into that directory && then executing the command (as one command since you are stuck operating from '{session_dir}'). For example, if you needed to run `npm install` in a project outside of '{session_dir}', you would need to prepend with a `cd` i.e. pseudocode for this would be `cd (path to project) && (command, in this case npm install)`. 152 | - When you realize you lack information about where in the codebase to make edits or find specific functionality, you MUST prioritize using the list_repomap tool first. This tool provides an overview of source code definitions (classes, functions, etc.) and helps you locate the relevant files more efficiently than reading multiple files sequentially. Crucially, do not attempt to guess file locations and read them sequentially using read_file; this is inefficient and error-prone. Use list_repomap to get a map first. Only use read_file after list_repomap has helped you narrow down the potential locations or if the user explicitly provided the path. 153 | - When using the search_files tool, craft your regex patterns carefully to balance specificity and flexibility. Based on the user's task you may use it to find code patterns, TODO comments, function definitions, or any text-based information across the project. The results include context, so analyze the surrounding code to better understand the matches. Leverage the search_files tool in combination with other tools for more comprehensive analysis. For example, use it to find specific code patterns, then use read_file (if appropriate according to its usage rules) to examine the full context of interesting matches before using replace_in_file to make informed changes. 154 | - When creating a new project (such as an app, website, or any software project), organize all new files within a dedicated project directory unless the user specifies otherwise. Use appropriate file paths when creating files, as the write_to_file tool will automatically create any necessary directories. Structure the project logically, adhering to best practices for the specific type of project being created. Unless otherwise specified, new projects should be easily run without additional setup, for example most projects can be built in HTML, CSS, and JavaScript - which you can open in a browser. 155 | - Be sure to consider the type of project (e.g. Python, JavaScript, web application) when determining the appropriate structure and files to include. Also consider what files may be most relevant to accomplishing the task, for example looking at a project's manifest file would help you understand the project's dependencies, which you could incorporate into any code you write. 156 | - When making changes to code, always consider the context in which the code is being used. Ensure that your changes are compatible with the existing codebase and that they follow the project's coding standards and best practices. 157 | - When you want to modify a file, use the replace_in_file or write_to_file tool directly with the desired changes. You do not need to display the changes before using the tool. 158 | - Do not ask for more information than necessary. Use the tools provided to accomplish the user's request efficiently and effectively. When you've completed your task, you must use the attempt_completion tool to present the result to the user. The user may provide feedback, which you can use to make improvements and try again. 159 | - You are only allowed to ask the user questions using the ask_followup_question tool. Use this tool only when you need additional details to complete a task, and be sure to use a clear and concise question that will help you move forward with the task. However if you can use the available tools to avoid having to ask the user questions, you should do so. For example, if the user mentions a file that may be in an outside directory like the Desktop, you should use the list_files tool to list the files in the Desktop and check if the file they are talking about is there, rather than asking the user to provide the file path themselves. 160 | - When executing commands, if you don't see the expected output, assume the terminal executed the command successfully and proceed with the task. The user's terminal may be unable to stream the output back properly. If you absolutely need to see the actual terminal output, use the ask_followup_question tool to request the user to copy and paste it back to you. 161 | - The user may provide a file's contents directly in their message, in which case you shouldn't use the read_file tool to get the file contents again since you already have it. 162 | - Your goal is to try to accomplish the user's task, NOT engage in a back and forth conversation. 163 | - NEVER end attempt_completion result with a question or request to engage in further conversation! Formulate the end of your result in a way that is final and does not require further input from the user. 164 | - You are STRICTLY FORBIDDEN from starting your messages with "Great", "Certainly", "Okay", "Sure". You should NOT be conversational in your responses, but rather direct and to the point. For example you should NOT say "Great, I've updated the CSS" but instead something like "I've updated the CSS". It is important you be clear and technical in your messages. 165 | - When presented with images, utilize your vision capabilities to thoroughly examine them and extract meaningful information. Incorporate these insights into your thought process as you accomplish the user's task. 166 | - At the end of each user message, you will automatically receive . This information is not written by the user themselves, but is auto-generated to provide *passive context* about the project structure (via list_repomap results if available, or file structure) and the content of files currently added to the chat (via read_file or initial context). Do not treat it as a direct part of the user's request unless they explicitly refer to it. Use this context to inform your actions, but remember that tools like list_repomap, read_file, find_definition, and find_references are for *active exploration* when this passive context is insufficient. Results from these tools will update the for future turns. Explain your use of clearly. 167 | - Before executing commands, check the "Actively Running Terminals" section in . If present, consider how these active processes might impact your task. For example, if a local development server is already running, you wouldn't need to start it again. If no active terminals are listed, proceed with command execution as normal. 168 | - When using the replace_in_file tool, you must include complete lines in your SEARCH blocks, not partial lines. The system requires exact line matches and cannot match partial lines. For example, if you want to match a line containing "const x = 5;", your SEARCH block must include the entire line, not just "x = 5" or other fragments. If a replacement fails due to mismatch, use read_file to get the current content and try again with an updated SEARCH block. 169 | - When using the replace_in_file tool, if you use multiple SEARCH/REPLACE blocks, list them in the order they appear in the file. For example if you need to make changes to both line 10 and line 50, first include the SEARCH/REPLACE block for line 10, followed by the SEARCH/REPLACE block for line 50. 170 | - It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc. Address any errors reported in the tool result (like linter errors or match failures) before proceeding or attempting completion. 171 | - **Language Rule**: You MUST respond to my question in the same language I use to ask it. This is a strict requirement. For example, if I ask in Chinese, your response MUST be in Chinese. If you fail to detect the language, match the language of my input as closely as possible. This rule overrides any default language preferences. 172 | 173 | ==== 174 | 175 | SYSTEM INFORMATION 176 | 177 | Operating System: {os_name} 178 | Default Shell: {shell} 179 | Home Directory: {homedir} 180 | Session Directory: {session_dir} 181 | 182 | ==== 183 | 184 | OBJECTIVE 185 | 186 | You accomplish a given task iteratively, breaking it down into clear steps and working through them methodically. 187 | 188 | 1. Understand the user's request and review the `` for context (file structure, cached file content, RepoMap), and set clear, achievable goals to accomplish it. Prioritize these goals in a logical order. 189 | 2. Work through these goals sequentially, utilizing available tools one at a time as necessary. Each goal should correspond to a distinct step in your problem-solving process. You will be informed on the work completed and what's remaining as you go. 190 | 3. Remember, you have extensive capabilities with access to a wide range of tools from the `AVAILABLE TOOLS` list that can be used in powerful and clever ways as necessary to accomplish each goal. First, analyze the file structure provided in to gain context and insights for proceeding effectively. Then, think about which of the provided tools is the most relevant tool to accomplish the user's task. Next, go through each of the required parameters of the relevant tool and determine if the user has directly provided or given enough information to infer a value. When deciding if the parameter can be inferred, carefully consider all the context to see if it supports a specific value. If all of the required parameters are present or can be reasonably inferred, and proceed with the tool use. BUT, if one of the values for a required parameter is missing, DO NOT invoke the tool (not even with fillers for the missing params) and instead, ask the user to provide the missing parameters using the ask_followup_question tool. DO NOT ask for more information on optional parameters if it is not provided. 191 | 4. Once you've completed the user's task, you must use the attempt_completion tool to present the result of the task to the user. You may also provide a CLI command to showcase the result of your task; this can be particularly useful for web development tasks, where you can run e.g. `open index.html` to show the website you've built. 192 | 5. The user may provide feedback, which you can use to make improvements and try again. But DO NOT continue in pointless back and forth conversations, i.e. don't end your responses with questions or offers for further assistance. 193 | """ 194 | -------------------------------------------------------------------------------- /tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Tool Implementations for the Emigo Agent. 6 | 7 | This module defines the concrete Python functions that correspond to the tools 8 | the LLM agent can request (as defined in `system_prompt.py`). These functions 9 | are dispatched by the main `emigo.py` process after receiving a tool request 10 | from the `llm_worker.py` and potentially obtaining user approval via Emacs. 11 | 12 | Each tool function receives the relevant `Session` object (providing access to 13 | session state like the root path and caches) and a dictionary of parameters 14 | extracted from the LLM's request. 15 | 16 | Tools interact with the user's environment primarily by: 17 | - Calling back to Emacs functions via `utils.py` (e.g., for executing commands, 18 | replacing text in buffers, asking questions). 19 | - Interacting with the file system within the session's directory. 20 | - Modifying the session state (e.g., adding files to context, updating caches). 21 | 22 | Each tool function returns a string result formatted for the LLM, indicating 23 | success (often with output) or failure (with an error message). 24 | """ 25 | 26 | import os 27 | import sys 28 | import json 29 | import re 30 | import traceback 31 | import difflib 32 | from typing import Dict, List, Tuple, Optional, Any # Add Any 33 | 34 | # Import Session class for type hinting and accessing session state 35 | from session import Session 36 | # Import utilities for calling Emacs and file reading 37 | from utils import get_emacs_func_result, eval_in_emacs, read_file_content 38 | # Import system prompt constants for standard messages/prefixes 39 | from config import ( 40 | TOOL_RESULT_SUCCESS, TOOL_RESULT_OUTPUT_PREFIX, 41 | TOOL_DENIED, TOOL_ERROR_PREFIX, TOOL_ERROR_SUFFIX 42 | ) 43 | 44 | # --- Helper Functions --- 45 | 46 | def _format_tool_result(result_content: str) -> str: 47 | """Formats a successful tool result.""" 48 | # Simple format for now 49 | return f"{TOOL_RESULT_SUCCESS}\n{result_content}" 50 | 51 | def _format_tool_error(error_message: str) -> str: 52 | """Formats a tool error message using standard prefixes/suffixes.""" 53 | return f"{TOOL_ERROR_PREFIX}{error_message}{TOOL_ERROR_SUFFIX}" 54 | 55 | def _resolve_path(session_path: str, rel_path: str) -> str: 56 | """Resolves a relative path within the session path.""" 57 | return os.path.abspath(os.path.join(session_path, rel_path)) 58 | 59 | def _posix_path(path: str) -> str: 60 | """Converts a path to use POSIX separators.""" 61 | return path.replace(os.sep, '/') 62 | 63 | # --- Tool Implementations --- 64 | 65 | def execute_command(session: Session, parameters: Dict[str, Any]) -> str: 66 | """Executes a shell command via Emacs.""" 67 | command = parameters.get("command") 68 | if not command: 69 | return _format_tool_error("Missing required parameter 'command'") 70 | 71 | try: 72 | print(f"Executing command: {command} in {session.session_path}", file=sys.stderr) 73 | # Use synchronous call to Emacs to run command and get result 74 | output = get_emacs_func_result("execute-command-sync", session.session_path, command) 75 | return _format_tool_result(f"{TOOL_RESULT_OUTPUT_PREFIX}{output}") 76 | except Exception as e: 77 | print(f"Error executing command '{command}' via Emacs: {e}", file=sys.stderr) 78 | return _format_tool_error(f"Error executing command: {e}") 79 | 80 | def read_file(session: Session, parameters: Dict[str, Any]) -> str: 81 | """Reads a file, adds it to context, and updates the session cache.""" 82 | rel_path = parameters.get("path") 83 | if not rel_path: 84 | return _format_tool_error("Missing required parameter 'path'") 85 | 86 | abs_path = _resolve_path(session.session_path, rel_path) 87 | posix_rel_path = _posix_path(rel_path) 88 | 89 | try: 90 | if not os.path.isfile(abs_path): 91 | return _format_tool_error(f"File not found: {posix_rel_path}") 92 | 93 | # Add file to context list (Session class handles duplicates) 94 | added, add_msg = session.add_file_to_context(abs_path) # Use abs_path here 95 | if added: 96 | print(add_msg, file=sys.stderr) 97 | eval_in_emacs("message", f"[Emigo] {add_msg}") # Notify Emacs 98 | 99 | # Session._update_file_cache (called by add_file_to_context or get_cached_content) 100 | # handles reading and caching. We just need to ensure it's in context. 101 | # Force a cache update/read if it wasn't already added. 102 | if not added: 103 | session._update_file_cache(rel_path) 104 | 105 | # Return success message; content is now cached for environment details 106 | return _format_tool_result(f"File '{posix_rel_path}' read and added to context.") 107 | except Exception as e: 108 | print(f"Error reading file '{rel_path}': {e}", file=sys.stderr) 109 | session.invalidate_cache(rel_path) # Invalidate cache on error 110 | return _format_tool_error(f"Error reading file: {e}") 111 | 112 | def write_to_file(session: Session, parameters: Dict[str, Any]) -> str: 113 | """Writes content to a file and updates the session cache.""" 114 | rel_path = parameters.get("path") 115 | content = parameters.get("content") # Use get for content as well 116 | if not rel_path: 117 | return _format_tool_error("Missing required parameter 'path'") 118 | if content is None: # Check if content is None (missing) 119 | return _format_tool_error("Missing required parameter 'content'") 120 | 121 | abs_path = _resolve_path(session.session_path, rel_path) 122 | posix_rel_path = _posix_path(rel_path) 123 | 124 | try: 125 | # Ensure parent directory exists 126 | os.makedirs(os.path.dirname(abs_path), exist_ok=True) 127 | 128 | # Write the file directly 129 | with open(abs_path, 'w', encoding='utf-8') as f: 130 | f.write(content) 131 | print(f"Written content to {abs_path}", file=sys.stderr) 132 | 133 | # Inform Emacs about the change so it can prompt user to revert if needed 134 | eval_in_emacs("emigo--file-written-externally", abs_path) 135 | 136 | # Update session cache with the written content 137 | session._update_file_cache(rel_path, content=content) 138 | 139 | return _format_tool_result(f"File '{posix_rel_path}' written successfully.") 140 | 141 | except Exception as e: 142 | print(f"Error writing file '{rel_path}': {e}", file=sys.stderr) 143 | session.invalidate_cache(rel_path) # Invalidate cache on error 144 | return _format_tool_error(f"Error writing file: {e}") 145 | 146 | def _parse_search_replace_blocks(diff_str: str) -> Tuple[List[Tuple[str, str]], Optional[str]]: 147 | """Parses *all* SEARCH/REPLACE blocks from a diff string. 148 | 149 | Args: 150 | diff_str: The string containing one or more SEARCH/REPLACE blocks. 151 | 152 | Returns: 153 | A tuple containing: 154 | - A list of (search_text, replace_text) tuples for each valid block found. 155 | - An error message string if parsing fails, otherwise None. 156 | """ 157 | search_marker = "<<<<<<< SEARCH\n" 158 | divider_marker = "\n=======\n" 159 | replace_marker = "\n>>>>>>> REPLACE" 160 | blocks = [] 161 | # Use regex to find all blocks non-greedily 162 | pattern = re.compile( 163 | re.escape(search_marker) + 164 | '(.*?)' + # Capture search text (non-greedy) 165 | re.escape(divider_marker) + 166 | '(.*?)' + # Capture replace text (non-greedy) 167 | re.escape(replace_marker), 168 | re.DOTALL # Allow '.' to match newlines 169 | ) 170 | 171 | found_blocks_raw = pattern.findall(diff_str) 172 | 173 | if not found_blocks_raw: 174 | # Check for common markdown fence if no blocks found 175 | if "```" in diff_str and search_marker not in diff_str: 176 | return [], "Diff content seems to be a markdown code block, not a SEARCH/REPLACE block." 177 | return [], "No valid SEARCH/REPLACE blocks found in the provided diff." 178 | 179 | for search_text, replace_text in found_blocks_raw: 180 | # Basic validation: ensure markers are not nested within text itself in unexpected ways 181 | # This check is basic and might not catch all complex nesting scenarios. 182 | if search_marker in search_text or divider_marker in search_text or replace_marker in search_text or \ 183 | search_marker in replace_text or divider_marker in replace_text or replace_marker in replace_text: 184 | return [], f"Detected malformed or nested SEARCH/REPLACE markers within a block's content:\nSearch:\n{search_text}\nReplace:\n{replace_text}" 185 | 186 | # Optional: Remove trailing newline from replace_text if needed, 187 | # but generally keep content as-is from the LLM. 188 | # if replace_text.endswith('\n'): 189 | # replace_text = replace_text[:-1] 190 | 191 | blocks.append((search_text, replace_text)) 192 | 193 | return blocks, None 194 | 195 | def _get_line_number(text: str, char_index: int) -> int: 196 | """Calculates the 1-based line number for a given character index.""" 197 | return text.count('\n', 0, char_index) + 1 198 | 199 | def replace_in_file(session: Session, parameters: Dict[str, str]) -> str: 200 | """Replaces content in a file using SEARCH/REPLACE blocks via Emacs.""" 201 | rel_path = parameters.get("path") 202 | diff_str = parameters.get("diff") 203 | similarity_threshold = 0.85 # Configurable threshold (85%) 204 | 205 | abs_path = os.path.abspath(os.path.join(session.session_path, rel_path)) 206 | posix_rel_path = rel_path.replace(os.sep, '/') 207 | 208 | try: 209 | if not os.path.isfile(abs_path): 210 | return _format_tool_error(f"File not found: {rel_path}. Please ensure it's added to the chat first.") 211 | 212 | # --- Get File Content --- 213 | # Use the session's method to get cached content (updates if stale) 214 | file_content = session.get_cached_content(rel_path) 215 | if file_content is None: 216 | # If get_cached_content returns None, it means the file likely doesn't exist 217 | # or couldn't be read/cached previously. 218 | return _format_tool_error(f"Could not get content for file: {posix_rel_path}. It might not exist or be readable.") 219 | 220 | # Note: session.get_cached_content already handles reading if necessary. 221 | # The check below is redundant if get_cached_content works correctly, 222 | # but we keep it as a safeguard against potential error strings stored in cache. 223 | if file_content.startswith("# Error"): # Check if cached content is an error message 224 | return _format_tool_error(f"Cannot perform replacement. Cached content indicates a previous error for: {posix_rel_path}. Please use read_file again.") 225 | 226 | # --- Parse *All* Diff Blocks --- 227 | parsed_blocks, parse_error = _parse_search_replace_blocks(diff_str) 228 | print("Block", parsed_blocks, "Error", parse_error) 229 | if parse_error: 230 | return _format_tool_error(parse_error) 231 | if not parsed_blocks: 232 | return _format_tool_error("No valid SEARCH/REPLACE blocks found in the diff.") 233 | 234 | # --- Sequential Line-by-Line Matching Logic --- 235 | file_lines = file_content.splitlines(keepends=True) # Keep endings for accurate line numbers 236 | replacements_to_apply = [] # List of (start_line, elisp_end_line, replace_text) 237 | errors = [] 238 | already_matched_file_line_indices = set() # Track file lines used in successful matches 239 | 240 | def _compare_stripped_lines(line1: str, line2: str) -> float: 241 | """Compares two lines after stripping whitespace and returns similarity ratio.""" 242 | stripped1 = line1.strip() 243 | stripped2 = line2.strip() 244 | if not stripped1 and not stripped2: # Both are whitespace/empty 245 | return 1.0 246 | if not stripped1 or not stripped2: # One is whitespace/empty, the other isn't 247 | return 0.0 248 | # Use SequenceMatcher for similarity ratio on stripped lines 249 | return difflib.SequenceMatcher(None, stripped1, stripped2).ratio() 250 | 251 | # Iterate through each SEARCH/REPLACE block provided 252 | for block_index, (search_text, replace_text) in enumerate(parsed_blocks): 253 | search_lines = search_text.splitlines(keepends=True) 254 | if not search_lines or not search_text.strip(): 255 | errors.append(f"Block {block_index+1}: SEARCH block is empty or contains only whitespace.") 256 | continue 257 | 258 | found_match_for_block = False 259 | # Iterate through each line of the actual file content as a potential start 260 | # Use range(len(file_lines)) to avoid issues if file_lines is modified (it shouldn't be here) 261 | for file_start_index in range(len(file_lines)): 262 | # Check if this starting line is already part of a previous successful match 263 | if file_start_index in already_matched_file_line_indices: 264 | continue # Skip this starting line if it's already consumed 265 | 266 | # --- Attempt to match the *entire* search block starting here --- 267 | current_match_len = 0 268 | potential_match_indices = set() # Track indices for this *potential* match 269 | all_search_lines_matched_sequentially = True 270 | 271 | for search_line_index in range(len(search_lines)): 272 | current_file_index = file_start_index + search_line_index 273 | 274 | # Check bounds and if the *current* file line is already consumed 275 | if current_file_index >= len(file_lines) or current_file_index in already_matched_file_line_indices: 276 | all_search_lines_matched_sequentially = False 277 | # print(f" Debug: Match failed at search line {search_line_index+1}: File index {current_file_index} out of bounds or already matched.", file=sys.stderr) 278 | break # Cannot match further from this file_start_index 279 | 280 | # Compare current search line with corresponding file line (stripped) 281 | match_ratio = _compare_stripped_lines(search_lines[search_line_index], file_lines[current_file_index]) 282 | 283 | if match_ratio < similarity_threshold: 284 | all_search_lines_matched_sequentially = False 285 | # print(f" Debug: Match failed at search line {search_line_index+1}: Similarity {match_ratio:.2f} < {similarity_threshold} for file index {current_file_index}.", file=sys.stderr) 286 | break # Mismatch found, abandon this sequence attempt for this file_start_index 287 | 288 | # Line matches, record index for this potential block match 289 | potential_match_indices.add(current_file_index) 290 | current_match_len += 1 291 | 292 | # --- Check if the *entire block* matched sequentially --- 293 | if all_search_lines_matched_sequentially: 294 | # --- Match Found for this block --- 295 | start_line_num = file_start_index + 1 # 1-based line number 296 | # End line is the start line + number of matched lines 297 | end_line_num_inclusive = start_line_num + current_match_len - 1 298 | # Elisp needs the line number *after* the last line to delete 299 | elisp_end_line_num = end_line_num_inclusive + 1 300 | 301 | replacements_to_apply.append((start_line_num, elisp_end_line_num, replace_text)) 302 | found_match_for_block = True 303 | 304 | # Mark the file lines used by this *confirmed* match as consumed 305 | already_matched_file_line_indices.update(potential_match_indices) 306 | 307 | print(f"Block {block_index+1}: Found sequential match for lines {start_line_num}-{end_line_num_inclusive} (Elisp end: {elisp_end_line_num}) in '{posix_rel_path}'", file=sys.stderr) 308 | 309 | # Stop searching for *this specific block* once a match is found 310 | break # Exit the inner loop (file_start_index loop) and move to the next block in parsed_blocks 311 | 312 | # If no match was found for this block after checking all possible start lines 313 | if not found_match_for_block: 314 | errors.append( 315 | f"Block {block_index+1}: Could not find a sequential match for the SEARCH text in '{posix_rel_path}'.\n" 316 | f"SEARCH block start:\n```\n{''.join(search_lines[:5])}{'...' if len(search_lines) > 5 else ''}\n```" # Show start of block 317 | ) 318 | 319 | # --- Handle Errors or Proceed --- 320 | if errors: 321 | error_header = f"Failed to apply replacements to '{posix_rel_path}' due to {len(errors)} error(s):\n" 322 | error_details = "\n\n".join(errors) 323 | # Suggest reading the file again 324 | error_footer = "\nPlease use read_file to get the exact current content and try again with updated SEARCH blocks." 325 | return _format_tool_error(error_header + error_details + error_footer) 326 | 327 | if not replacements_to_apply: 328 | return _format_tool_error("No replacements could be applied (all blocks failed matching or were empty).") 329 | 330 | 331 | # --- Call Elisp to Perform Multiple Replacements --- 332 | try: 333 | # Serialize the list of replacements to JSON for Elisp 334 | # Convert Python list to JSON array string that Elisp can parse 335 | replacements_json = json.dumps(replacements_to_apply) 336 | print(f"Requesting {len(replacements_to_apply)} replacements in '{posix_rel_path}' via Elisp.", file=sys.stderr) 337 | 338 | result = get_emacs_func_result("replace-regions-sync", abs_path, replacements_json) 339 | 340 | # --- Process Elisp Result --- 341 | if result is True or str(result).lower() == 't': # Check for elisp t 342 | print(f"Elisp successfully applied {len(replacements_to_apply)} replacements to '{rel_path}'.", file=sys.stderr) 343 | # Success: Re-read content from Emacs and update session cache 344 | try: 345 | updated_content = read_file_content(abs_path) 346 | # Use session's method to update cache with new content 347 | session._update_file_cache(rel_path, content=updated_content) 348 | print(f"Updated session cache for '{rel_path}' after successful replacement.", file=sys.stderr) 349 | except Exception as read_err: 350 | print(f"Warning: Failed to re-read file '{rel_path}' after replacement to update cache: {read_err}", file=sys.stderr) 351 | # Invalidate cache entry on read error using session method 352 | session.invalidate_cache(rel_path) 353 | # Return success, but mention the cache issue 354 | return _format_tool_result(f"{TOOL_RESULT_SUCCESS}\nFile '{posix_rel_path}' modified successfully by applying {len(replacements_to_apply)} block(s).\n(Warning: Could not update session cache after modification.)") 355 | 356 | return _format_tool_result(f"{TOOL_RESULT_SUCCESS}\nFile '{posix_rel_path}' modified successfully by applying {len(replacements_to_apply)} block(s).") 357 | else: 358 | # Elisp returned an error 359 | error_detail = str(result) if result else "Unknown error during multi-replacement in Emacs." 360 | print(f"Error applying multi-replacement via Elisp to '{rel_path}': {error_detail}", file=sys.stderr) 361 | return _format_tool_error( 362 | f"Error applying replacements in Emacs: {error_detail}\n\n" 363 | f"File: {posix_rel_path}\n" 364 | f"Please check the Emacs *Messages* buffer for details." 365 | ) 366 | except Exception as elisp_call_err: 367 | print(f"Error calling Elisp function 'replace-regions-sync' for '{rel_path}': {elisp_call_err}\n{traceback.format_exc()}", file=sys.stderr) 368 | return _format_tool_error(f"Error communicating with Emacs for replacement: {elisp_call_err}") 369 | 370 | except Exception as e: 371 | print(f"Error during replace_in_file for '{rel_path}': {e}\n{traceback.format_exc()}", file=sys.stderr) 372 | return _format_tool_error(f"Error processing replacement for {posix_rel_path}: {e}") 373 | 374 | 375 | def ask_followup_question(session: Session, parameters: Dict[str, Any]) -> str: 376 | """Asks the user a question via Emacs.""" 377 | question = parameters.get("question") 378 | # Options should be a list of strings from the parsed JSON parameters 379 | options_list = parameters.get("options") 380 | 381 | if not question: 382 | return _format_tool_error("Missing required parameter 'question'") 383 | 384 | try: 385 | # Validate options_list and convert to JSON string for Elisp 386 | options_json_str = "[]" 387 | if isinstance(options_list, list) and all(isinstance(opt, str) for opt in options_list): 388 | # Ensure 2-5 options as per original prompt description (optional check) 389 | if 2 <= len(options_list) <= 5: 390 | options_json_str = json.dumps(options_list) 391 | else: 392 | print(f"Warning: Received {len(options_list)} options, expected 2-5. Sending empty options.", file=sys.stderr) 393 | elif options_list is not None: # If options provided but not a list of strings 394 | print(f"Warning: Invalid format for options, expected list of strings: {options_list}. Sending empty options.", file=sys.stderr) 395 | 396 | # Ask Emacs to present the question and get the user's answer (synchronous) 397 | answer = get_emacs_func_result("ask-user-sync", session.session_path, question, options_json_str) 398 | 399 | if answer is None or answer == "": # Check for nil or empty string from Emacs 400 | # User likely cancelled or provided no input 401 | print("User cancelled or provided no answer to followup question.", file=sys.stderr) 402 | return TOOL_DENIED # Use standard denial message 403 | else: 404 | # Wrap answer for clarity in the LLM prompt 405 | return _format_tool_result(f"\n{answer}\n") 406 | except Exception as e: 407 | print(f"Error asking followup question via Emacs: {e}", file=sys.stderr) 408 | return _format_tool_error(f"Error asking question: {e}") 409 | 410 | def attempt_completion(session: Session, parameters: Dict[str, Any]) -> str: 411 | """Signals completion to Emacs.""" 412 | result_text = parameters.get("result") 413 | command = parameters.get("command") # Optional command to demonstrate 414 | 415 | if result_text is None: # Check if result is missing 416 | return _format_tool_error("Missing required parameter 'result'") 417 | 418 | try: 419 | # Signal completion to Emacs (asynchronous is fine here) 420 | eval_in_emacs("emigo--signal-completion", session.session_path, result_text, command or "") 421 | # This tool use itself doesn't return content to the LLM, it ends the loop. 422 | # Return a special marker that the main process/worker can check. 423 | return "COMPLETION_SIGNALLED" 424 | except Exception as e: 425 | print(f"Error signalling completion to Emacs: {e}", file=sys.stderr) 426 | return _format_tool_error(f"Error signalling completion: {e}") 427 | 428 | def list_repomap(session: Session, parameters: Dict[str, Any]) -> str: 429 | """Generates and caches the repository map, potentially focusing on a path.""" 430 | # Get the optional path parameter, default to session root '.' 431 | rel_path = parameters.get("path", ".") 432 | abs_path = _resolve_path(session.session_path, rel_path) 433 | posix_rel_path = _posix_path(rel_path) 434 | 435 | try: 436 | # Validate the path 437 | if not os.path.isdir(abs_path): 438 | return _format_tool_error(f"Path is not a valid directory: {posix_rel_path}") 439 | 440 | chat_files = session.get_chat_files() 441 | print(f"Generating repomap for {session.session_path}, focusing on '{posix_rel_path}' with chat files: {chat_files}", file=sys.stderr) 442 | 443 | # --- TODO: Enhance RepoMapper --- 444 | # Currently, session.repo_mapper.generate_map likely maps the whole root. 445 | # Ideally, generate_map would accept abs_path or rel_path to focus the analysis. 446 | # For now, we proceed but the map might be broader than the requested path. 447 | # repo_map_content = session.repo_mapper.generate_map(chat_files=chat_files, target_path=abs_path) # Example of future call 448 | repo_map_content = session.repo_mapper.generate_map(chat_files=chat_files) # Current call 449 | 450 | if not repo_map_content: 451 | repo_map_content = "(No map content generated)" 452 | 453 | # Store the generated map content in the session cache 454 | session.set_last_repomap(repo_map_content) 455 | 456 | # Update success message to reflect the requested focus path 457 | return _format_tool_result(f"Repository map generated, focusing analysis around '{posix_rel_path}'.") 458 | 459 | except Exception as e: 460 | print(f"Error generating repomap for path '{posix_rel_path}': {e}\n{traceback.format_exc()}", file=sys.stderr) 461 | session.set_last_repomap(None) # Clear stored map on error 462 | return _format_tool_error(f"Error generating repository map for '{posix_rel_path}': {e}") 463 | 464 | def list_files(session: Session, parameters: Dict[str, Any]) -> str: 465 | """Lists files in a directory via Emacs.""" 466 | rel_path = parameters.get("path", ".") # Default to session path root 467 | recursive = parameters.get("recursive", False) # Default to False if missing or not bool 468 | 469 | # Ensure recursive is boolean 470 | if not isinstance(recursive, bool): 471 | recursive = str(recursive).lower() == "true" 472 | 473 | abs_path = _resolve_path(session.session_path, rel_path) 474 | posix_rel_path = _posix_path(rel_path) 475 | try: 476 | # Use Emacs function to list files respecting ignores etc. 477 | files_str = get_emacs_func_result("list-files-sync", abs_path, recursive) 478 | # Elisp function should return a newline-separated string of relative paths 479 | 480 | return _format_tool_result( 481 | f"Files in '{posix_rel_path}' ({'recursive' if recursive else 'non-recursive'}):\n{files_str}" 482 | ) 483 | except Exception as e: 484 | print(f"Error listing files via Emacs: {e}", file=sys.stderr) 485 | return _format_tool_error(f"Error listing files: {e}") 486 | 487 | def search_files(session: Session, parameters: Dict[str, Any]) -> str: 488 | """Searches files using Emacs's capabilities.""" 489 | rel_path = parameters.get("path", ".") 490 | pattern = parameters.get("pattern") 491 | case_sensitive = parameters.get("case_sensitive", False) # Default to False 492 | max_matches_arg = parameters.get("max_matches", 50) # Default to 50 493 | 494 | if not pattern: 495 | return _format_tool_error("Missing required parameter 'pattern'") 496 | 497 | # Validate/sanitize max_matches 498 | try: 499 | max_matches = min(200, int(max_matches_arg)) # Cap at 200 500 | if max_matches <= 0: 501 | max_matches = 50 # Ensure positive, default 50 502 | except (ValueError, TypeError): 503 | max_matches = 50 # Default if conversion fails 504 | 505 | # Ensure case_sensitive is boolean 506 | if not isinstance(case_sensitive, bool): 507 | case_sensitive = str(case_sensitive).lower() == "true" 508 | 509 | abs_path = _resolve_path(session.session_path, rel_path) 510 | posix_rel_path = _posix_path(rel_path) 511 | search_scope_path = abs_path 512 | search_scope_desc = posix_rel_path 513 | 514 | try: 515 | # Check if the provided path is a file; if so, search its directory 516 | if os.path.isfile(abs_path): 517 | search_scope_path = os.path.dirname(abs_path) 518 | search_scope_desc = _posix_path(os.path.relpath(search_scope_path, session.session_path)) 519 | print(f"Note: '{posix_rel_path}' is a file. Searching its directory: '{search_scope_desc}'", file=sys.stderr) 520 | elif not os.path.isdir(search_scope_path): 521 | return _format_tool_error(f"Path not found or is not a directory/file: {posix_rel_path}") 522 | 523 | # Call Emacs function to perform the search in the determined scope 524 | search_results = get_emacs_func_result( 525 | "search-files-sync", search_scope_path, pattern, case_sensitive, max_matches 526 | ) 527 | 528 | if not search_results or search_results.strip() == "": 529 | return _format_tool_result(f"No matches found for pattern: {pattern} in '{search_scope_desc}'") 530 | 531 | result = f"Found matches for pattern '{pattern}' in '{search_scope_desc}':\n{search_results}" 532 | # Elisp function should ideally handle truncation notes if applicable 533 | 534 | return _format_tool_result(result) 535 | 536 | except Exception as e: 537 | print(f"Error searching files via Emacs: {e}\n{traceback.format_exc()}", file=sys.stderr) 538 | return _format_tool_error(f"Error searching files: {e}") 539 | -------------------------------------------------------------------------------- /emigo-epc.el: -------------------------------------------------------------------------------- 1 | ;;; epcs.el --- EPC Server -*- lexical-binding: t -*- 2 | 3 | ;; Copyright (C) 2011,2012,2013 Masashi Sakurai 4 | 5 | ;; Author: Masashi Sakurai 6 | ;; Keywords: lisp 7 | 8 | ;; This program is free software; you can redistribute it and/or modify 9 | ;; it under the terms of the GNU General Public License as published by 10 | ;; the Free Software Foundation, either version 3 of the License, or 11 | ;; (at your option) any later version. 12 | 13 | ;; This program is distributed in the hope that it will be useful, 14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | ;; GNU General Public License for more details. 17 | 18 | ;; You should have received a copy of the GNU General Public License 19 | ;; along with this program. If not, see . 20 | 21 | ;;; Commentary: 22 | 23 | ;; 24 | 25 | ;;; Code: 26 | 27 | (require 'cl-lib) 28 | (require 'subr-x) 29 | 30 | ;; deferred 31 | (cl-defmacro emigo-deferred-chain (&rest elements) 32 | "Anaphoric function chain macro for deferred chains." 33 | (declare (debug (&rest form)) 34 | (indent 0)) 35 | `(let (it) 36 | ,@(cl-loop for i in elements 37 | collect 38 | `(setq it ,i)) 39 | it)) 40 | 41 | ;; Debug 42 | (defvar emigo-deferred-debug nil 43 | "Debug output switch.") 44 | 45 | (defvar emigo-deferred-debug-count 0 46 | "[internal] Debug output counter.") 47 | 48 | (defun emigo-deferred-log (&rest args) 49 | "[internal] Debug log function." 50 | (when emigo-deferred-debug 51 | (with-current-buffer (get-buffer-create "*emigo-deferred-log*") 52 | (save-excursion 53 | (goto-char (point-max)) 54 | (insert (format "%5i %s\n\n\n" emigo-deferred-debug-count (apply #'format args))))) 55 | (cl-incf emigo-deferred-debug-count))) 56 | 57 | (defvar emigo-deferred-debug-on-signal nil 58 | "If non nil, the value `debug-on-signal' is substituted this 59 | value in the `condition-case' form in deferred 60 | implementations. Then, Emacs debugger can catch an error occurred 61 | in the asynchronous tasks.") 62 | 63 | (cl-defmacro emigo-deferred-condition-case (var protected-form &rest handlers) 64 | "[internal] Custom condition-case. See the comment for 65 | `emigo-deferred-debug-on-signal'." 66 | (declare (debug condition-case) 67 | (indent 1)) 68 | `(let ((debug-on-signal 69 | (or debug-on-signal emigo-deferred-debug-on-signal))) 70 | (condition-case ,var 71 | ,protected-form 72 | ,@handlers))) 73 | 74 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 75 | ;; Back end functions of deferred tasks 76 | 77 | (defvar emigo-deferred-tick-time 0.001 78 | "Waiting time between asynchronous tasks (second). 79 | The shorter waiting time increases the load of Emacs. The end 80 | user can tune this parameter. However, applications should not 81 | modify it because the applications run on various environments.") 82 | 83 | (defvar emigo-deferred-queue nil 84 | "[internal] The execution queue of deferred objects. 85 | See the functions `emigo-deferred-post-task' and `emigo-deferred-worker'.") 86 | 87 | (defun emigo-deferred-post-task (d which &optional arg) 88 | "[internal] Add a deferred object to the execution queue 89 | `emigo-deferred-queue' and schedule to execute. 90 | D is a deferred object. WHICH is a symbol, `ok' or `ng'. ARG is 91 | an argument value for execution of the deferred task." 92 | (let ((pack `(,d ,which . ,arg))) 93 | (push pack emigo-deferred-queue) 94 | (emigo-deferred-log "QUEUE-POST [%s]: %s" (length emigo-deferred-queue) pack) 95 | (run-at-time emigo-deferred-tick-time nil 'emigo-deferred-worker) 96 | d)) 97 | 98 | (defun emigo-deferred-worker () 99 | "[internal] Consume a deferred task. 100 | Mainly this function is called by timer asynchronously." 101 | (when emigo-deferred-queue 102 | (let* ((pack (car (last emigo-deferred-queue))) 103 | (d (car pack)) 104 | (which (cadr pack)) 105 | (arg (cddr pack)) value) 106 | (setq emigo-deferred-queue (nbutlast emigo-deferred-queue)) 107 | (condition-case err 108 | (setq value (emigo-deferred-exec-task d which arg)) 109 | (error 110 | (emigo-deferred-log "ERROR : %s" err) 111 | (message "deferred error : %s" err))) 112 | value))) 113 | 114 | ;; Struct: emigo-deferred-object 115 | ;; 116 | ;; callback : a callback function (default `identity') 117 | ;; errorback : an errorback function (default `emigo-deferred-resignal') 118 | ;; cancel : a canceling function (default `emigo-deferred-default-cancel') 119 | ;; next : a next chained deferred object (default nil) 120 | ;; status : if 'ok or 'ng, this deferred has a result (error) value. (default nil) 121 | ;; value : saved value (default nil) 122 | ;; 123 | (cl-defstruct emigo-deferred-object 124 | (callback 'identity) 125 | (errorback 'emigo-deferred-resignal) 126 | (cancel 'emigo-deferred-default-cancel) 127 | next status value) 128 | 129 | (defun emigo-deferred-resignal (err) 130 | "[internal] Safely resignal ERR as an Emacs condition. 131 | 132 | If ERR is a cons (ERROR-SYMBOL . DATA) where ERROR-SYMBOL has an 133 | `error-conditions' property, it is re-signaled unchanged. If ERR 134 | is a string, it is signaled as a generic error using `error'. 135 | Otherwise, ERR is formatted into a string as if by `print' before 136 | raising with `error'." 137 | (cond ((and (listp err) 138 | (symbolp (car err)) 139 | (get (car err) 'error-conditions)) 140 | (signal (car err) (cdr err))) 141 | ((stringp err) 142 | (error "%s" err)) 143 | (t 144 | (error "%S" err)))) 145 | 146 | (defun emigo-deferred-default-cancel (d) 147 | "[internal] Default canceling function." 148 | (emigo-deferred-log "CANCEL : %s" d) 149 | (setf (emigo-deferred-object-callback d) 'identity) 150 | (setf (emigo-deferred-object-errorback d) 'emigo-deferred-resignal) 151 | (setf (emigo-deferred-object-next d) nil) 152 | d) 153 | 154 | (defun emigo-deferred-exec-task (d which &optional arg) 155 | "[internal] Executing deferred task. If the deferred object has 156 | next deferred task or the return value is a deferred object, this 157 | function adds the task to the execution queue. 158 | D is a deferred object. WHICH is a symbol, `ok' or `ng'. ARG is 159 | an argument value for execution of the deferred task." 160 | (emigo-deferred-log "EXEC : %s / %s / %s" d which arg) 161 | (when (null d) (error "emigo-deferred-exec-task was given a nil.")) 162 | (let ((callback (if (eq which 'ok) 163 | (emigo-deferred-object-callback d) 164 | (emigo-deferred-object-errorback d))) 165 | (next-deferred (emigo-deferred-object-next d))) 166 | (cond 167 | (callback 168 | (emigo-deferred-condition-case err 169 | (let ((value (funcall callback arg))) 170 | (cond 171 | ((emigo-deferred-object-p value) 172 | (emigo-deferred-log "WAIT NEST : %s" value) 173 | (if next-deferred 174 | (emigo-deferred-set-next value next-deferred) 175 | value)) 176 | (t 177 | (if next-deferred 178 | (emigo-deferred-post-task next-deferred 'ok value) 179 | (setf (emigo-deferred-object-status d) 'ok) 180 | (setf (emigo-deferred-object-value d) value) 181 | value)))) 182 | (error 183 | (cond 184 | (next-deferred 185 | (emigo-deferred-post-task next-deferred 'ng err)) 186 | (t 187 | (emigo-deferred-log "ERROR : %S" err) 188 | (message "deferred error : %S" err) 189 | (setf (emigo-deferred-object-status d) 'ng) 190 | (setf (emigo-deferred-object-value d) err) 191 | err))))) 192 | (t ; <= (null callback) 193 | (cond 194 | (next-deferred 195 | (emigo-deferred-exec-task next-deferred which arg)) 196 | ((eq which 'ok) arg) 197 | (t ; (eq which 'ng) 198 | (emigo-deferred-resignal arg))))))) 199 | 200 | (defun emigo-deferred-set-next (prev next) 201 | "[internal] Connect deferred objects." 202 | (setf (emigo-deferred-object-next prev) next) 203 | (cond 204 | ((eq 'ok (emigo-deferred-object-status prev)) 205 | (setf (emigo-deferred-object-status prev) nil) 206 | (let ((ret (emigo-deferred-exec-task 207 | next 'ok (emigo-deferred-object-value prev)))) 208 | (if (emigo-deferred-object-p ret) ret 209 | next))) 210 | ((eq 'ng (emigo-deferred-object-status prev)) 211 | (setf (emigo-deferred-object-status prev) nil) 212 | (let ((ret (emigo-deferred-exec-task next 'ng (emigo-deferred-object-value prev)))) 213 | (if (emigo-deferred-object-p ret) ret 214 | next))) 215 | (t 216 | next))) 217 | 218 | (defun emigo-deferred-new (&optional callback) 219 | "Create a deferred object." 220 | (if callback 221 | (make-emigo-deferred-object :callback callback) 222 | (make-emigo-deferred-object))) 223 | 224 | (defun emigo-deferred-callback (d &optional arg) 225 | "Start deferred chain with a callback message." 226 | (emigo-deferred-exec-task d 'ok arg)) 227 | 228 | (defun emigo-deferred-errorback (d &optional arg) 229 | "Start deferred chain with an errorback message." 230 | (declare (indent 1)) 231 | (emigo-deferred-exec-task d 'ng arg)) 232 | 233 | (defun emigo-deferred-callback-post (d &optional arg) 234 | "Add the deferred object to the execution queue." 235 | (declare (indent 1)) 236 | (emigo-deferred-post-task d 'ok arg)) 237 | 238 | (defun emigo-deferred-next (&optional callback arg) 239 | "Create a deferred object and schedule executing. This function 240 | is a short cut of following code: 241 | (emigo-deferred-callback-post (emigo-deferred-new callback))." 242 | (let ((d (if callback 243 | (make-emigo-deferred-object :callback callback) 244 | (make-emigo-deferred-object)))) 245 | (emigo-deferred-callback-post d arg) 246 | d)) 247 | 248 | (defun emigo-deferred-nextc (d callback) 249 | "Create a deferred object with OK callback and connect it to the given deferred object." 250 | (declare (indent 1)) 251 | (let ((nd (make-emigo-deferred-object :callback callback))) 252 | (emigo-deferred-set-next d nd))) 253 | 254 | (defun emigo-deferred-error (d callback) 255 | "Create a deferred object with errorback and connect it to the given deferred object." 256 | (declare (indent 1)) 257 | (let ((nd (make-emigo-deferred-object :errorback callback))) 258 | (emigo-deferred-set-next d nd))) 259 | 260 | (defvar emigo-epc-debug nil) 261 | 262 | (defun emigo-epc-log (&rest args) 263 | (when emigo-epc-debug 264 | (with-current-buffer (get-buffer-create "*emigo-epc-log*") 265 | (buffer-disable-undo) 266 | (goto-char (point-max)) 267 | (insert (apply 'format args) "\n\n\n")))) 268 | 269 | (defun emigo-epc-make-procbuf (name) 270 | "[internal] Make a process buffer." 271 | (let ((buf (get-buffer-create name))) 272 | (with-current-buffer buf 273 | (set (make-local-variable 'kill-buffer-query-functions) nil) 274 | (erase-buffer) (buffer-disable-undo)) 275 | buf)) 276 | 277 | (defvar emigo-epc-uid 1) 278 | 279 | (defun emigo-epc-uid () 280 | (cl-incf emigo-epc-uid)) 281 | 282 | (defvar emigo-epc-accept-process-timeout 150 283 | "Asynchronous timeout time. (msec)") 284 | 285 | (put 'epc-error 'error-conditions '(error epc-error)) 286 | (put 'epc-error 'error-message "EPC Error") 287 | 288 | (cl-defstruct emigo-epc-connection 289 | "Set of information for network connection and event handling. 290 | 291 | name : Connection name. This name is used for process and buffer names. 292 | process : Connection process object. 293 | buffer : Working buffer for the incoming data. 294 | channel : Event channels for incoming messages." 295 | name process buffer channel) 296 | 297 | (defun emigo-epc-connect (host port) 298 | "[internal] Connect the server, initialize the process and 299 | return emigo-epc-connection object." 300 | (emigo-epc-log ">> Connection start: %s:%s" host port) 301 | (let* ((connection-id (emigo-epc-uid)) 302 | (connection-name (format "emigo-epc con %s" connection-id)) 303 | (connection-buf (emigo-epc-make-procbuf (format "*%s*" connection-name))) 304 | (connection-process 305 | (open-network-stream connection-name connection-buf host port)) 306 | (channel (list connection-name nil)) 307 | (connection (make-emigo-epc-connection 308 | :name connection-name 309 | :process connection-process 310 | :buffer connection-buf 311 | :channel channel))) 312 | (emigo-epc-log ">> Connection establish") 313 | (set-process-coding-system connection-process 'binary 'binary) 314 | (set-process-filter connection-process 315 | (lambda (p m) 316 | (emigo-epc-process-filter connection p m))) 317 | (set-process-sentinel connection-process 318 | (lambda (p e) 319 | (emigo-epc-process-sentinel connection p e))) 320 | (set-process-query-on-exit-flag connection-process nil) 321 | connection)) 322 | 323 | (defun emigo-epc-process-sentinel (connection process msg) 324 | (emigo-epc-log "!! Process Sentinel [%s] : %S : %S" 325 | (emigo-epc-connection-name connection) process msg) 326 | (emigo-epc-disconnect connection)) 327 | 328 | (defun emigo-epc-net-send (connection sexp) 329 | (let* ((msg (encode-coding-string 330 | (concat (emigo-epc-prin1-to-string sexp) "\n") 'utf-8-unix)) 331 | (string (concat (format "%06x" (length msg)) msg)) 332 | (proc (emigo-epc-connection-process connection))) 333 | (emigo-epc-log ">> SEND : [%S]" string) 334 | (process-send-string proc string))) 335 | 336 | (defun emigo-epc-disconnect (connection) 337 | (let ((process (emigo-epc-connection-process connection)) 338 | (buf (emigo-epc-connection-buffer connection)) 339 | (name (emigo-epc-connection-name connection))) 340 | (emigo-epc-log "!! Disconnect [%s]" name) 341 | (when process 342 | (set-process-sentinel process nil) 343 | (delete-process process) 344 | (when (get-buffer buf) (kill-buffer buf))) 345 | (emigo-epc-log "!! Disconnected finished [%s]" name))) 346 | 347 | (defun emigo-epc-process-filter (connection process message) 348 | (emigo-epc-log "INCOMING: [%s] [%S]" (emigo-epc-connection-name connection) message) 349 | (with-current-buffer (emigo-epc-connection-buffer connection) 350 | (goto-char (point-max)) 351 | (insert message) 352 | (emigo-epc-process-available-input connection process))) 353 | 354 | (defun emigo-epc-signal-connect (channel event-sym &optional callback) 355 | "Append an observer for EVENT-SYM of CHANNEL and return a deferred object. 356 | If EVENT-SYM is `t', the observer receives all signals of the channel. 357 | If CALLBACK function is given, the deferred object executes the 358 | CALLBACK function asynchronously. One can connect subsequent 359 | tasks to the returned deferred object." 360 | (let ((d (if callback 361 | (emigo-deferred-new callback) 362 | (emigo-deferred-new)))) 363 | (push (cons event-sym d) 364 | (cddr channel)) 365 | d)) 366 | 367 | (defun emigo-epc-signal-send (channel event-sym &rest args) 368 | "Send a signal to CHANNEL. If ARGS values are given, 369 | observers can get the values by following code: 370 | 371 | (lambda (event) 372 | (destructuring-bind 373 | (event-sym (args)) 374 | event ... )) 375 | " 376 | (let ((observers (cddr channel)) 377 | (event (list event-sym args))) 378 | (cl-loop for i in observers 379 | for name = (car i) 380 | for d = (cdr i) 381 | if (or (eq event-sym name) (eq t name)) 382 | do (emigo-deferred-callback-post d event)))) 383 | 384 | (defun emigo-epc-process-available-input (connection process) 385 | "Process all complete messages that have arrived from Lisp." 386 | (with-current-buffer (process-buffer process) 387 | (while (emigo-epc-net-have-input-p) 388 | (let ((event (emigo-epc-net-read-or-lose process)) 389 | (ok nil)) 390 | (emigo-epc-log "<< RECV [%S]" event) 391 | (unwind-protect 392 | (condition-case err 393 | (progn 394 | (apply 'emigo-epc-signal-send 395 | (cons (emigo-epc-connection-channel connection) event)) 396 | (setq ok t)) 397 | ('error (emigo-epc-log "MsgError: %S / <= %S" err event))) 398 | (unless ok 399 | (emigo-epc-process-available-input connection process))))))) 400 | 401 | (defun emigo-epc-net-have-input-p () 402 | "Return true if a complete message is available." 403 | (goto-char (point-min)) 404 | (and (>= (buffer-size) 6) 405 | (>= (- (buffer-size) 6) (emigo-epc-net-decode-length)))) 406 | 407 | (defun emigo-epc-net-read-or-lose (_process) 408 | (condition-case error 409 | (emigo-epc-net-read) 410 | (error 411 | (debug 'error error) 412 | (error "net-read error: %S" error)))) 413 | 414 | (defun emigo-epc-net-read () 415 | "Read a message from the network buffer." 416 | (goto-char (point-min)) 417 | (let* ((length (emigo-epc-net-decode-length)) 418 | (start (+ 6 (point))) 419 | (end (+ start length)) 420 | _content) 421 | (cl-assert (cl-plusp length)) 422 | (prog1 (save-restriction 423 | (narrow-to-region start end) 424 | (read (decode-coding-string 425 | (buffer-string) 'utf-8-unix))) 426 | (delete-region (point-min) end)))) 427 | 428 | (defun emigo-epc-net-decode-length () 429 | "Read a 24-bit hex-encoded integer from buffer." 430 | (string-to-number (buffer-substring-no-properties (point) (+ (point) 6)) 16)) 431 | 432 | (defun emigo-epc-prin1-to-string (sexp) 433 | "Like `prin1-to-string' but don't octal-escape non-ascii characters. 434 | This is more compatible with the CL reader." 435 | (with-temp-buffer 436 | (let (print-escape-nonascii 437 | print-escape-newlines 438 | print-length 439 | print-level) 440 | (prin1 sexp (current-buffer)) 441 | (buffer-string)))) 442 | 443 | (cl-defstruct emigo-epc-manager 444 | "Root object that holds all information related to an EPC activity. 445 | 446 | `emigo-epc-start-epc' returns this object. 447 | 448 | title : instance name for displaying on the `emigo-epc-controller' UI 449 | server-process : process object for the peer 450 | commands : a list of (prog . args) 451 | port : port number 452 | connection : emigo-epc-connection instance 453 | methods : alist of method (name . function) 454 | sessions : alist of session (id . deferred) 455 | exit-hook : functions for after shutdown EPC connection" 456 | title server-process commands port connection methods sessions exit-hooks) 457 | 458 | (cl-defstruct emigo-epc-method 459 | "Object to hold serving method information. 460 | 461 | name : method name (symbol) ex: 'test 462 | task : method function (function with one argument) 463 | arg-specs : arg-specs (one string) ex: \"(A B C D)\" 464 | docstring : docstring (one string) ex: \"A test function. Return sum of A,B,C and D\" 465 | " 466 | name task docstring arg-specs) 467 | 468 | (defvar emigo-epc-live-connections nil 469 | "[internal] A list of `emigo-epc-manager' objects. 470 | those objects currently connect to the epc peer. 471 | This variable is for debug purpose.") 472 | 473 | (defun emigo-epc-server-process-name (uid) 474 | (format "emigo-epc-server:%s" uid)) 475 | 476 | (defun emigo-epc-server-buffer-name (uid) 477 | (format " *%s*" (emigo-epc-server-process-name uid))) 478 | 479 | (defun emigo-epc-stop-epc (mngr) 480 | "Disconnect the connection for the server." 481 | (let* ((proc (emigo-epc-manager-server-process mngr)) 482 | (buf (and proc (process-buffer proc)))) 483 | (emigo-epc-disconnect (emigo-epc-manager-connection mngr)) 484 | (when proc 485 | (accept-process-output proc 0 emigo-epc-accept-process-timeout t)) 486 | (when (and proc (equal 'run (process-status proc))) 487 | (kill-process proc)) 488 | (when buf (kill-buffer buf)) 489 | (setq emigo-epc-live-connections (delete mngr emigo-epc-live-connections)) 490 | )) 491 | 492 | (defun emigo-epc-args (args) 493 | "[internal] If ARGS is an atom, return it. If list, return the cadr of it." 494 | (cond 495 | ((atom args) args) 496 | (t (cadr args)))) 497 | 498 | (defun emigo-epc-init-epc-layer (mngr) 499 | "[internal] Connect to the server program and return an emigo-epc-connection instance." 500 | (let* ((mngr mngr) 501 | (conn (emigo-epc-manager-connection mngr)) 502 | (channel (emigo-epc-connection-channel conn))) 503 | ;; dispatch incoming messages with the lexical scope 504 | (cl-loop for (method . body) in 505 | `((call 506 | . (lambda (args) 507 | (emigo-epc-log "SIG CALL: %S" args) 508 | (apply 'emigo-epc-handler-called-method ,mngr (emigo-epc-args args)))) 509 | (return 510 | . (lambda (args) 511 | (emigo-epc-log "SIG RET: %S" args) 512 | (apply 'emigo-epc-handler-return ,mngr (emigo-epc-args args)))) 513 | (return-error 514 | . (lambda (args) 515 | (emigo-epc-log "SIG RET-ERROR: %S" args) 516 | (apply 'emigo-epc-handler-return-error ,mngr (emigo-epc-args args)))) 517 | (epc-error 518 | . (lambda (args) 519 | (emigo-epc-log "SIG EPC-ERROR: %S" args) 520 | (apply 'emigo-epc-handler-epc-error ,mngr (emigo-epc-args args)))) 521 | (methods 522 | . (lambda (args) 523 | (emigo-epc-log "SIG METHODS: %S" args) 524 | (emigo-epc-handler-methods ,mngr (caadr args)))) 525 | ) do 526 | (emigo-epc-signal-connect channel method body)) 527 | (push mngr emigo-epc-live-connections) 528 | mngr)) 529 | 530 | (defun emigo-epc-manager-send (mngr method &rest messages) 531 | "[internal] low-level message sending." 532 | (let* ((conn (emigo-epc-manager-connection mngr))) 533 | (emigo-epc-net-send conn (cons method messages)))) 534 | 535 | (defun emigo-epc-manager-get-method (mngr method-name) 536 | "[internal] Return a method object. If not found, return nil." 537 | (cl-loop for i in (emigo-epc-manager-methods mngr) 538 | if (eq method-name (emigo-epc-method-name i)) 539 | do (cl-return i))) 540 | 541 | (defun emigo-epc-handler-methods (mngr uid) 542 | "[internal] Return a list of information for registered methods." 543 | (let ((info 544 | (cl-loop for i in (emigo-epc-manager-methods mngr) 545 | collect 546 | (list 547 | (emigo-epc-method-name i) 548 | (or (emigo-epc-method-arg-specs i) "") 549 | (or (emigo-epc-method-docstring i) ""))))) 550 | (emigo-epc-manager-send mngr 'return uid info))) 551 | 552 | (defun emigo-epc-handler-called-method (mngr uid name args) 553 | "[internal] low-level message handler for peer's calling." 554 | (let ((mngr mngr) (uid uid)) 555 | (let* ((_methods (emigo-epc-manager-methods mngr)) 556 | (method (emigo-epc-manager-get-method mngr name))) 557 | (cond 558 | ((null method) 559 | (emigo-epc-log "ERR: No such method : %s" name) 560 | (emigo-epc-manager-send mngr 'epc-error uid (format "EPC-ERROR: No such method : %s" name))) 561 | (t 562 | (condition-case err 563 | (let* ((f (emigo-epc-method-task method)) 564 | (ret (apply f args))) 565 | (cond 566 | ((emigo-deferred-object-p ret) 567 | (emigo-deferred-nextc ret 568 | (lambda (xx) (emigo-epc-manager-send mngr 'return uid xx)))) 569 | (t (emigo-epc-manager-send mngr 'return uid ret)))) 570 | (error 571 | ;; Include method name and args in error for debugging 572 | (let ((err-msg (format "FAILED in %s: %S with ERROR: %S" name args err))) 573 | (emigo-epc-log err-msg) 574 | (emigo-epc-manager-send mngr 'return-error uid err-msg))))))))) 575 | 576 | (defun emigo-epc-manager-remove-session (mngr uid) 577 | "[internal] Remove a session from the epc manager object." 578 | (cl-loop with ret = nil 579 | for pair in (emigo-epc-manager-sessions mngr) 580 | unless (eq uid (car pair)) 581 | do (push pair ret) 582 | finally 583 | do (setf (emigo-epc-manager-sessions mngr) ret))) 584 | 585 | (defun emigo-epc-handler-return (mngr uid args) 586 | "[internal] low-level message handler for normal returns." 587 | (let ((pair (assq uid (emigo-epc-manager-sessions mngr)))) 588 | (cond 589 | (pair 590 | (emigo-epc-log "RET: id:%s [%S]" uid args) 591 | (emigo-epc-manager-remove-session mngr uid) 592 | (emigo-deferred-callback (cdr pair) args)) 593 | (t ; error 594 | (emigo-epc-log "RET: NOT FOUND: id:%s [%S]" uid args))))) 595 | 596 | (defun emigo-epc-handler-return-error (mngr uid args) 597 | "[internal] low-level message handler for application errors." 598 | (let ((pair (assq uid (emigo-epc-manager-sessions mngr))) 599 | (cond 600 | (pair 601 | (emigo-epc-log "RET-ERR: id:%s [%S]" uid args) 602 | (emigo-epc-manager-remove-session mngr uid) 603 | (let* ((err-str (format "%S" args)) 604 | ;; Add context about the failed call if available 605 | (when (and (listp args) (eq (car args) 'error)) 606 | (setq err-str (format "EPC call failed: %S" args))) 607 | (emigo-deferred-errorback (cdr pair) err-str)))) 608 | (t ; error 609 | (emigo-epc-log "RET-ERR: NOT FOUND: id:%s [%S]" uid args)))))) 610 | 611 | (defun emigo-epc-handler-epc-error (mngr uid args) 612 | "[internal] low-level message handler for epc errors." 613 | (let ((pair (assq uid (emigo-epc-manager-sessions mngr)))) 614 | (cond 615 | (pair 616 | (emigo-epc-log "RET-EPC-ERR: id:%s [%S]" uid args) 617 | (emigo-epc-manager-remove-session mngr uid) 618 | (emigo-deferred-errorback (cdr pair) (list 'epc-error args))) 619 | (t ; error 620 | (emigo-epc-log "RET-EPC-ERR: NOT FOUND: id:%s [%S]" uid args))))) 621 | 622 | (defun emigo-epc-call-deferred (mngr method-name args) 623 | "Call peer's method with args asynchronously. Return a deferred 624 | object which is called with the result." 625 | (let ((uid (emigo-epc-uid)) 626 | (sessions (emigo-epc-manager-sessions mngr)) 627 | (d (emigo-deferred-new))) 628 | (push (cons uid d) sessions) 629 | (setf (emigo-epc-manager-sessions mngr) sessions) 630 | (emigo-epc-manager-send mngr 'call uid method-name args) 631 | d)) 632 | 633 | (defun emigo-epc-define-method (mngr method-name task &optional arg-specs docstring) 634 | "Define a method and return a deferred object which is called by the peer." 635 | (let* ((method (make-emigo-epc-method 636 | :name method-name :task task 637 | :arg-specs arg-specs :docstring docstring)) 638 | (methods (cons method (emigo-epc-manager-methods mngr)))) 639 | (setf (emigo-epc-manager-methods mngr) methods) 640 | method)) 641 | 642 | (defun emigo-epc-sync (mngr d) 643 | "Wrap deferred methods with synchronous waiting, and return the result. 644 | If an exception is occurred, this function throws the error." 645 | (let ((result 'emigo-epc-nothing)) 646 | (emigo-deferred-chain 647 | d 648 | (emigo-deferred-nextc it 649 | (lambda (x) (setq result x))) 650 | (emigo-deferred-error it 651 | (lambda (er) (setq result (cons 'error er))))) 652 | (while (eq result 'emigo-epc-nothing) 653 | (save-current-buffer 654 | (accept-process-output 655 | (emigo-epc-connection-process (emigo-epc-manager-connection mngr)) 656 | 0 emigo-epc-accept-process-timeout t))) 657 | (if (and (consp result) (eq 'error (car result))) 658 | (error (cdr result)) result))) 659 | 660 | (defun emigo-epc-call-sync (mngr method-name args) 661 | "Call peer's method with args synchronously and return the result. 662 | If an exception is occurred, this function throws the error." 663 | (emigo-epc-sync mngr (emigo-epc-call-deferred mngr method-name args))) 664 | 665 | (defun emigo-epc-live-p (mngr) 666 | "Return non-nil when MNGR is an EPC manager object with a live 667 | connection." 668 | (let ((proc (ignore-errors 669 | (emigo-epc-connection-process (emigo-epc-manager-connection mngr))))) 670 | (and (processp proc) 671 | ;; Same as `process-live-p' in Emacs >= 24: 672 | (memq (process-status proc) '(run open listen connect stop))))) 673 | 674 | ;; epcs 675 | (defvar emigo-epc-server-client-processes nil 676 | "[internal] A list of ([process object] . [`emigo-epc-manager' instance]). 677 | When the server process accepts the client connection, the 678 | `emigo-epc-manager' instance is created and stored in this variable 679 | `emigo-epc-server-client-processes'. This variable is used for the management 680 | purpose.") 681 | 682 | ;; emigo-epc-server 683 | ;; name : process name (string) ex: "EPC Server 1" 684 | ;; process : server process object 685 | ;; port : port number 686 | ;; connect-function : initialize function for `emigo-epc-manager' instances 687 | (cl-defstruct emigo-epc-server name process port connect-function) 688 | 689 | (defvar emigo-epc-server-processes nil 690 | "[internal] A list of ([process object] . [`emigo-epc-server' instance]). 691 | This variable is used for the management purpose.") 692 | 693 | (defun emigo-epc-server-get-manager-by-process (proc) 694 | "[internal] Return the emigo-epc-manager instance for the PROC." 695 | (cl-loop for (pp . mngr) in emigo-epc-server-client-processes 696 | if (eql pp proc) 697 | do (cl-return mngr) 698 | finally return nil)) 699 | 700 | (defun emigo-epc-server-accept (process) 701 | "[internal] Initialize the process and return emigo-epc-manager object." 702 | (emigo-epc-log "EMIGO-EPC-SERVER- >> Connection accept: %S" process) 703 | (let* ((connection-id (emigo-epc-uid)) 704 | (connection-name (format "emigo-epc con %s" connection-id)) 705 | (channel (list connection-name nil)) 706 | (connection (make-emigo-epc-connection 707 | :name connection-name 708 | :process process 709 | :buffer (process-buffer process) 710 | :channel channel))) 711 | (emigo-epc-log "EMIGO-EPC-SERVER- >> Connection establish") 712 | (set-process-coding-system process 'binary 'binary) 713 | (set-process-filter process 714 | (lambda (p m) 715 | (emigo-epc-process-filter connection p m))) 716 | (set-process-query-on-exit-flag process nil) 717 | (set-process-sentinel process 718 | (lambda (p e) 719 | (emigo-epc-process-sentinel connection p e))) 720 | (make-emigo-epc-manager :server-process process :port t 721 | :connection connection))) 722 | 723 | (defun emigo-epc-server-sentinel (process message connect-function) 724 | "[internal] Process sentinel handler for the server process." 725 | (emigo-epc-log "EMIGO-EPC-SERVER- SENTINEL: %S %S" process message) 726 | (let ((mngr (emigo-epc-server-get-manager-by-process process))) 727 | (cond 728 | ;; new connection 729 | ((and (string-match "open" message) (null mngr)) 730 | (condition-case err 731 | (let ((mngr (emigo-epc-server-accept process))) 732 | (push (cons process mngr) emigo-epc-server-client-processes) 733 | (emigo-epc-init-epc-layer mngr) 734 | (when connect-function (funcall connect-function mngr)) 735 | mngr) 736 | ('error 737 | (emigo-epc-log "EMIGO-EPC-SERVER- Protocol error: %S" err) 738 | (emigo-epc-log "EMIGO-EPC-SERVER- ABORT %S" process) 739 | (delete-process process)))) 740 | ;; ignore 741 | ((null mngr) nil ) 742 | ;; disconnect 743 | (t 744 | (let ((pair (assq process emigo-epc-server-client-processes)) _d) 745 | (when pair 746 | (emigo-epc-log "EMIGO-EPC-SERVER- DISCONNECT %S" process) 747 | (emigo-epc-stop-epc (cdr pair)) 748 | (setq emigo-epc-server-client-processes 749 | (assq-delete-all process emigo-epc-server-client-processes)) 750 | )) 751 | nil)))) 752 | 753 | (defun emigo-epc-server-start (connect-function &optional port) 754 | "Start TCP Server and return the main process object." 755 | (let* 756 | ((connect-function connect-function) 757 | (name (format "EMIGO EPC Server %s" (emigo-epc-uid))) 758 | (buf (emigo-epc-make-procbuf (format " *%s*" name))) 759 | (main-process 760 | (make-network-process 761 | :name name 762 | :buffer buf 763 | :family 'ipv4 764 | :server t 765 | :host "127.0.0.1" 766 | :service (or port t) 767 | :noquery t 768 | :sentinel 769 | (lambda (process message) 770 | (emigo-epc-server-sentinel process message connect-function))))) 771 | (push (cons main-process 772 | (make-emigo-epc-server 773 | :name name :process main-process 774 | :port (process-contact main-process :service) 775 | :connect-function connect-function)) 776 | emigo-epc-server-processes) 777 | main-process)) 778 | 779 | (provide 'emigo-epc) 780 | ;;; emigo-epc.el ends here 781 | --------------------------------------------------------------------------------