├── Emigo.png
├── requirements.txt
├── queries
├── tree-sitter-languages
│ ├── elisp-tags.scm
│ ├── python-tags.scm
│ ├── c-tags.scm
│ ├── java-tags.scm
│ ├── kotlin-tags.scm
│ ├── ql-tags.scm
│ ├── php-tags.scm
│ ├── cpp-tags.scm
│ ├── go-tags.scm
│ ├── elm-tags.scm
│ ├── c_sharp-tags.scm
│ ├── typescript-tags.scm
│ ├── ruby-tags.scm
│ ├── rust-tags.scm
│ ├── elixir-tags.scm
│ ├── hcl-tags.scm
│ ├── README.md
│ ├── javascript-tags.scm
│ ├── dart-tags.scm
│ └── ocaml-tags.scm
└── tree-sitter-language-pack
│ └── javascript-tags.scm
├── .github
└── workflows
│ └── doctor-pr.yml
├── todo.md
├── .gitignore
├── REPLIT_SETUP_COMPLETE.md
├── llm_providers.py
├── replit.md
├── test_setup.py
├── README.md
├── README_REPLIT.md
├── config.py
├── agent.py
├── utils.py
├── LICENSE
├── tool_definitions.py
├── llm.py
├── session.py
├── system_prompt.py
├── tools.py
└── emigo-epc.el
/Emigo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthewZMD/emigo/HEAD/Emigo.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | epc
2 | networkx
3 | pygments
4 | grep-ast
5 | diskcache
6 | tiktoken
7 | tqdm
8 | gitignore_parser
9 | scipy
10 | litellm
11 | orjson
12 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/elisp-tags.scm:
--------------------------------------------------------------------------------
1 | ;; defun/defsubst
2 | (function_definition name: (symbol) @name.definition.function) @definition.function
3 |
4 | ;; Treat macros as function definitions for the sake of TAGS.
5 | (macro_definition name: (symbol) @name.definition.function) @definition.function
6 |
7 | ;; Match function calls
8 | (list (symbol) @name.reference.function) @reference.function
9 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/python-tags.scm:
--------------------------------------------------------------------------------
1 | (class_definition
2 | name: (identifier) @name.definition.class) @definition.class
3 |
4 | (function_definition
5 | name: (identifier) @name.definition.function) @definition.function
6 |
7 | (call
8 | function: [
9 | (identifier) @name.reference.call
10 | (attribute
11 | attribute: (identifier) @name.reference.call)
12 | ]) @reference.call
13 |
--------------------------------------------------------------------------------
/.github/workflows/doctor-pr.yml:
--------------------------------------------------------------------------------
1 | name: Doctor PR
2 | on:
3 | workflow_dispatch:
4 | inputs:
5 | action_input:
6 | required: true
7 | type: string
8 | jobs:
9 | doctor-pr:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Doctor PR
13 | uses: Doctor-PR/action@latest
14 | with:
15 | action_input: ${{inputs.action_input}}
16 | anthropic_api_key: ${{secrets.ANTHROPIC_API_KEY}}
17 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/c-tags.scm:
--------------------------------------------------------------------------------
1 | (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class
2 |
3 | (declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class
4 |
5 | (function_declarator declarator: (identifier) @name.definition.function) @definition.function
6 |
7 | (type_definition declarator: (type_identifier) @name.definition.type) @definition.type
8 |
9 | (enum_specifier name: (type_identifier) @name.definition.type) @definition.type
10 |
--------------------------------------------------------------------------------
/todo.md:
--------------------------------------------------------------------------------
1 | 1. 是否要对 AI 输出的代码也做语法高亮? 我感觉不需要吧, diff 高亮就可以了, 如果没有输出 diff, 基本上也没啥用
2 | 2. 怎么根据 AI 输出生成 diff files 列表? Aidermacs 代码搬运过来? 每个项目都要按照文件粒度缓存补丁
3 | 3. diff review 的界面: 左边铺满, 左边上面分别是 "全部文件、文件 A、文件 B", 左边下面是 "全部文件的 hunks, 文件 A 的 hunks, 文件 B 的 hunks", 支持整个文件 apply/cancel 和 hunk 的 apply/cancel
4 | 4. 右侧栏应该显示所有 session 的状态,方便用户知道 AI 干完活以后,手动切换 session
5 | 5. 研究 Cursor 的提示词, 看看能否用 RAG 的方式来增强 aider tree-sitter 这种 repomap 的方式? 我总感觉 Cursor 的那种模式要高级一点, aider 适合自己的项目精确重构, Cursor 适应范围要广很多
6 | 6. 可以随时更改过去的某个 prompt,然后重新发给 LLM, 执行这个命令的时候, 建议临时取消 read-only 后, 编辑后重新发送
7 | 7. 动态切换 AI Model
8 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/java-tags.scm:
--------------------------------------------------------------------------------
1 | (class_declaration
2 | name: (identifier) @name.definition.class) @definition.class
3 |
4 | (method_declaration
5 | name: (identifier) @name.definition.method) @definition.method
6 |
7 | (method_invocation
8 | name: (identifier) @name.reference.call
9 | arguments: (argument_list) @reference.call)
10 |
11 | (interface_declaration
12 | name: (identifier) @name.definition.interface) @definition.interface
13 |
14 | (type_list
15 | (type_identifier) @name.reference.implementation) @reference.implementation
16 |
17 | (object_creation_expression
18 | type: (type_identifier) @name.reference.class) @reference.class
19 |
20 | (superclass (type_identifier) @name.reference.class) @reference.class
21 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/kotlin-tags.scm:
--------------------------------------------------------------------------------
1 | ; Definitions
2 |
3 | (class_declaration
4 | (type_identifier) @name.definition.class) @definition.class
5 |
6 | (function_declaration
7 | (simple_identifier) @name.definition.function) @definition.function
8 |
9 | (object_declaration
10 | (type_identifier) @name.definition.object) @definition.object
11 |
12 | ; References
13 |
14 | (call_expression
15 | [
16 | (simple_identifier) @name.reference.call
17 | (navigation_expression
18 | (navigation_suffix
19 | (simple_identifier) @name.reference.call))
20 | ]) @reference.call
21 |
22 | (delegation_specifier
23 | [
24 | (user_type) @name.reference.type
25 | (constructor_invocation
26 | (user_type) @name.reference.type)
27 | ]) @reference.type
28 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.so
6 | .Python
7 | build/
8 | develop-eggs/
9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | pip-wheel-metadata/
20 | share/python-wheels/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | MANIFEST
25 |
26 | # Virtual environments
27 | .env
28 | .venv
29 | env/
30 | venv/
31 | ENV/
32 | env.bak/
33 | venv.bak/
34 |
35 | # IDE
36 | .vscode/
37 | .idea/
38 | *.swp
39 | *.swo
40 | *~
41 |
42 | # Emacs
43 | \#*\#
44 | .\#*
45 | *.elc
46 | auto-save-list
47 | tramp
48 | .\#*
49 |
50 | # Cache
51 | .cache/
52 | .pythonlibs/
53 | *.cache
54 |
55 | # Emigo specific
56 | .emigo_repomap/
57 |
58 | # Replit
59 | .replit
60 | .upm/
61 | .config/
62 | replit.nix
63 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/ql-tags.scm:
--------------------------------------------------------------------------------
1 | (classlessPredicate
2 | name: (predicateName) @name.definition.function) @definition.function
3 |
4 | (memberPredicate
5 | name: (predicateName) @name.definition.method) @definition.method
6 |
7 | (aritylessPredicateExpr
8 | name: (literalId) @name.reference.call) @reference.call
9 |
10 | (module
11 | name: (moduleName) @name.definition.module) @definition.module
12 |
13 | (dataclass
14 | name: (className) @name.definition.class) @definition.class
15 |
16 | (datatype
17 | name: (className) @name.definition.class) @definition.class
18 |
19 | (datatypeBranch
20 | name: (className) @name.definition.class) @definition.class
21 |
22 | (qualifiedRhs
23 | name: (predicateName) @name.reference.call) @reference.call
24 |
25 | (typeExpr
26 | name: (className) @name.reference.type) @reference.type
27 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/php-tags.scm:
--------------------------------------------------------------------------------
1 | (class_declaration
2 | name: (name) @name.definition.class) @definition.class
3 |
4 | (function_definition
5 | name: (name) @name.definition.function) @definition.function
6 |
7 | (method_declaration
8 | name: (name) @name.definition.function) @definition.function
9 |
10 | (object_creation_expression
11 | [
12 | (qualified_name (name) @name.reference.class)
13 | (variable_name (name) @name.reference.class)
14 | ]) @reference.class
15 |
16 | (function_call_expression
17 | function: [
18 | (qualified_name (name) @name.reference.call)
19 | (variable_name (name)) @name.reference.call
20 | ]) @reference.call
21 |
22 | (scoped_call_expression
23 | name: (name) @name.reference.call) @reference.call
24 |
25 | (member_call_expression
26 | name: (name) @name.reference.call) @reference.call
27 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/cpp-tags.scm:
--------------------------------------------------------------------------------
1 | (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class
2 |
3 | (declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class
4 |
5 | (function_declarator declarator: (identifier) @name.definition.function) @definition.function
6 |
7 | (function_declarator declarator: (field_identifier) @name.definition.function) @definition.function
8 |
9 | (function_declarator declarator: (qualified_identifier scope: (namespace_identifier) @scope name: (identifier) @name.definition.method)) @definition.method
10 |
11 | (type_definition declarator: (type_identifier) @name.definition.type) @definition.type
12 |
13 | (enum_specifier name: (type_identifier) @name.definition.type) @definition.type
14 |
15 | (class_specifier name: (type_identifier) @name.definition.class) @definition.class
16 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/go-tags.scm:
--------------------------------------------------------------------------------
1 | (
2 | (comment)* @doc
3 | .
4 | (function_declaration
5 | name: (identifier) @name.definition.function) @definition.function
6 | (#strip! @doc "^//\\s*")
7 | (#set-adjacent! @doc @definition.function)
8 | )
9 |
10 | (
11 | (comment)* @doc
12 | .
13 | (method_declaration
14 | name: (field_identifier) @name.definition.method) @definition.method
15 | (#strip! @doc "^//\\s*")
16 | (#set-adjacent! @doc @definition.method)
17 | )
18 |
19 | (call_expression
20 | function: [
21 | (identifier) @name.reference.call
22 | (parenthesized_expression (identifier) @name.reference.call)
23 | (selector_expression field: (field_identifier) @name.reference.call)
24 | (parenthesized_expression (selector_expression field: (field_identifier) @name.reference.call))
25 | ]) @reference.call
26 |
27 | (type_spec
28 | name: (type_identifier) @name.definition.type) @definition.type
29 |
30 | (type_identifier) @name.reference.type @reference.type
31 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/elm-tags.scm:
--------------------------------------------------------------------------------
1 | (value_declaration (function_declaration_left (lower_case_identifier) @name.definition.function)) @definition.function
2 |
3 | (function_call_expr (value_expr (value_qid) @name.reference.function)) @reference.function
4 | (exposed_value (lower_case_identifier) @name.reference.function) @reference.function
5 | (type_annotation ((lower_case_identifier) @name.reference.function) (colon)) @reference.function
6 |
7 | (type_declaration ((upper_case_identifier) @name.definition.type)) @definition.type
8 |
9 | (type_ref (upper_case_qid (upper_case_identifier) @name.reference.type)) @reference.type
10 | (exposed_type (upper_case_identifier) @name.reference.type) @reference.type
11 |
12 | (type_declaration (union_variant (upper_case_identifier) @name.definition.union)) @definition.union
13 |
14 | (value_expr (upper_case_qid (upper_case_identifier) @name.reference.union)) @reference.union
15 |
16 |
17 | (module_declaration
18 | (upper_case_qid (upper_case_identifier)) @name.definition.module
19 | ) @definition.module
20 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/c_sharp-tags.scm:
--------------------------------------------------------------------------------
1 | (class_declaration
2 | name: (identifier) @name.definition.class
3 | ) @definition.class
4 |
5 | (class_declaration
6 | bases: (base_list (_) @name.reference.class)
7 | ) @reference.class
8 |
9 | (interface_declaration
10 | name: (identifier) @name.definition.interface
11 | ) @definition.interface
12 |
13 | (interface_declaration
14 | bases: (base_list (_) @name.reference.interface)
15 | ) @reference.interface
16 |
17 | (method_declaration
18 | name: (identifier) @name.definition.method
19 | ) @definition.method
20 |
21 | (object_creation_expression
22 | type: (identifier) @name.reference.class
23 | ) @reference.class
24 |
25 | (type_parameter_constraints_clause
26 | target: (identifier) @name.reference.class
27 | ) @reference.class
28 |
29 | (type_constraint
30 | type: (identifier) @name.reference.class
31 | ) @reference.class
32 |
33 | (variable_declaration
34 | type: (identifier) @name.reference.class
35 | ) @reference.class
36 |
37 | (invocation_expression
38 | function:
39 | (member_access_expression
40 | name: (identifier) @name.reference.send
41 | )
42 | ) @reference.send
43 |
44 | (namespace_declaration
45 | name: (identifier) @name.definition.module
46 | ) @definition.module
47 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/typescript-tags.scm:
--------------------------------------------------------------------------------
1 | (function_signature
2 | name: (identifier) @name.definition.function) @definition.function
3 |
4 | (method_signature
5 | name: (property_identifier) @name.definition.method) @definition.method
6 |
7 | (abstract_method_signature
8 | name: (property_identifier) @name.definition.method) @definition.method
9 |
10 | (abstract_class_declaration
11 | name: (type_identifier) @name.definition.class) @definition.class
12 |
13 | (module
14 | name: (identifier) @name.definition.module) @definition.module
15 |
16 | (interface_declaration
17 | name: (type_identifier) @name.definition.interface) @definition.interface
18 |
19 | (type_annotation
20 | (type_identifier) @name.reference.type) @reference.type
21 |
22 | (new_expression
23 | constructor: (identifier) @name.reference.class) @reference.class
24 |
25 | (function_declaration
26 | name: (identifier) @name.definition.function) @definition.function
27 |
28 | (method_definition
29 | name: (property_identifier) @name.definition.method) @definition.method
30 |
31 | (class_declaration
32 | name: (type_identifier) @name.definition.class) @definition.class
33 |
34 | (interface_declaration
35 | name: (type_identifier) @name.definition.class) @definition.class
36 |
37 | (type_alias_declaration
38 | name: (type_identifier) @name.definition.type) @definition.type
39 |
40 | (enum_declaration
41 | name: (identifier) @name.definition.enum) @definition.enum
42 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/ruby-tags.scm:
--------------------------------------------------------------------------------
1 | ; Method definitions
2 |
3 | (
4 | (comment)* @doc
5 | .
6 | [
7 | (method
8 | name: (_) @name.definition.method) @definition.method
9 | (singleton_method
10 | name: (_) @name.definition.method) @definition.method
11 | ]
12 | (#strip! @doc "^#\\s*")
13 | (#select-adjacent! @doc @definition.method)
14 | )
15 |
16 | (alias
17 | name: (_) @name.definition.method) @definition.method
18 |
19 | (setter
20 | (identifier) @ignore)
21 |
22 | ; Class definitions
23 |
24 | (
25 | (comment)* @doc
26 | .
27 | [
28 | (class
29 | name: [
30 | (constant) @name.definition.class
31 | (scope_resolution
32 | name: (_) @name.definition.class)
33 | ]) @definition.class
34 | (singleton_class
35 | value: [
36 | (constant) @name.definition.class
37 | (scope_resolution
38 | name: (_) @name.definition.class)
39 | ]) @definition.class
40 | ]
41 | (#strip! @doc "^#\\s*")
42 | (#select-adjacent! @doc @definition.class)
43 | )
44 |
45 | ; Module definitions
46 |
47 | (
48 | (module
49 | name: [
50 | (constant) @name.definition.module
51 | (scope_resolution
52 | name: (_) @name.definition.module)
53 | ]) @definition.module
54 | )
55 |
56 | ; Calls
57 |
58 | (call method: (identifier) @name.reference.call) @reference.call
59 |
60 | (
61 | [(identifier) (constant)] @name.reference.call @reference.call
62 | (#is-not? local)
63 | (#not-match? @name.reference.call "^(lambda|load|require|require_relative|__FILE__|__LINE__)$")
64 | )
65 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/rust-tags.scm:
--------------------------------------------------------------------------------
1 | ; ADT definitions
2 |
3 | (struct_item
4 | name: (type_identifier) @name.definition.class) @definition.class
5 |
6 | (enum_item
7 | name: (type_identifier) @name.definition.class) @definition.class
8 |
9 | (union_item
10 | name: (type_identifier) @name.definition.class) @definition.class
11 |
12 | ; type aliases
13 |
14 | (type_item
15 | name: (type_identifier) @name.definition.class) @definition.class
16 |
17 | ; method definitions
18 |
19 | (declaration_list
20 | (function_item
21 | name: (identifier) @name.definition.method)) @definition.method
22 |
23 | ; function definitions
24 |
25 | (function_item
26 | name: (identifier) @name.definition.function) @definition.function
27 |
28 | ; trait definitions
29 | (trait_item
30 | name: (type_identifier) @name.definition.interface) @definition.interface
31 |
32 | ; module definitions
33 | (mod_item
34 | name: (identifier) @name.definition.module) @definition.module
35 |
36 | ; macro definitions
37 |
38 | (macro_definition
39 | name: (identifier) @name.definition.macro) @definition.macro
40 |
41 | ; references
42 |
43 | (call_expression
44 | function: (identifier) @name.reference.call) @reference.call
45 |
46 | (call_expression
47 | function: (field_expression
48 | field: (field_identifier) @name.reference.call)) @reference.call
49 |
50 | (macro_invocation
51 | macro: (identifier) @name.reference.call) @reference.call
52 |
53 | ; implementations
54 |
55 | (impl_item
56 | trait: (type_identifier) @name.reference.implementation) @reference.implementation
57 |
58 | (impl_item
59 | type: (type_identifier) @name.reference.implementation
60 | !trait) @reference.implementation
61 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/elixir-tags.scm:
--------------------------------------------------------------------------------
1 | ; Definitions
2 |
3 | ; * modules and protocols
4 | (call
5 | target: (identifier) @ignore
6 | (arguments (alias) @name.definition.module)
7 | (#match? @ignore "^(defmodule|defprotocol)$")) @definition.module
8 |
9 | ; * functions/macros
10 | (call
11 | target: (identifier) @ignore
12 | (arguments
13 | [
14 | ; zero-arity functions with no parentheses
15 | (identifier) @name.definition.function
16 | ; regular function clause
17 | (call target: (identifier) @name.definition.function)
18 | ; function clause with a guard clause
19 | (binary_operator
20 | left: (call target: (identifier) @name.definition.function)
21 | operator: "when")
22 | ])
23 | (#match? @ignore "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @definition.function
24 |
25 | ; References
26 |
27 | ; ignore calls to kernel/special-forms keywords
28 | (call
29 | target: (identifier) @ignore
30 | (#match? @ignore "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp|defmodule|defprotocol|defimpl|defstruct|defexception|defoverridable|alias|case|cond|else|for|if|import|quote|raise|receive|require|reraise|super|throw|try|unless|unquote|unquote_splicing|use|with)$"))
31 |
32 | ; ignore module attributes
33 | (unary_operator
34 | operator: "@"
35 | operand: (call
36 | target: (identifier) @ignore))
37 |
38 | ; * function call
39 | (call
40 | target: [
41 | ; local
42 | (identifier) @name.reference.call
43 | ; remote
44 | (dot
45 | right: (identifier) @name.reference.call)
46 | ]) @reference.call
47 |
48 | ; * pipe into function call
49 | (binary_operator
50 | operator: "|>"
51 | right: (identifier) @name.reference.call) @reference.call
52 |
53 | ; * modules
54 | (alias) @name.reference.module @reference.module
55 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/hcl-tags.scm:
--------------------------------------------------------------------------------
1 | ;; Based on https://github.com/tree-sitter-grammars/tree-sitter-hcl/blob/main/make_grammar.js
2 | ;; Which has Apache 2.0 License
3 | ;; tags.scm for Terraform (tree-sitter-hcl)
4 |
5 | ; === Definitions: Terraform Blocks ===
6 | (block
7 | (identifier) @block_type
8 | (string_lit (template_literal) @resource_type)
9 | (string_lit (template_literal) @name.definition.resource)
10 | (body) @definition.resource
11 | ) (#eq? @block_type "resource")
12 |
13 | (block
14 | (identifier) @block_type
15 | (string_lit (template_literal) @name.definition.module)
16 | (body) @definition.module
17 | ) (#eq? @block_type "module")
18 |
19 | (block
20 | (identifier) @block_type
21 | (string_lit (template_literal) @name.definition.variable)
22 | (body) @definition.variable
23 | ) (#eq? @block_type "variable")
24 |
25 | (block
26 | (identifier) @block_type
27 | (string_lit (template_literal) @name.definition.output)
28 | (body) @definition.output
29 | ) (#eq? @block_type "output")
30 |
31 | (block
32 | (identifier) @block_type
33 | (string_lit (template_literal) @name.definition.provider)
34 | (body) @definition.provider
35 | ) (#eq? @block_type "provider")
36 |
37 | (block
38 | (identifier) @block_type
39 | (body
40 | (attribute
41 | (identifier) @name.definition.local
42 | (expression) @definition.local
43 | )+
44 | )
45 | ) (#eq? @block_type "locals")
46 |
47 | ; === References: Variables, Locals, Modules, Data, Resources ===
48 | ((variable_expr) @ref_type
49 | (get_attr (identifier) @name.reference.variable)
50 | ) @reference.variable
51 | (#eq? @ref_type "var")
52 |
53 | ((variable_expr) @ref_type
54 | (get_attr (identifier) @name.reference.local)
55 | ) @reference.local
56 | (#eq? @ref_type "local")
57 |
58 | ((variable_expr) @ref_type
59 | (get_attr (identifier) @name.reference.module)
60 | ) @reference.module
61 | (#eq? @ref_type "module")
62 |
63 | ((variable_expr) @ref_type
64 | (get_attr (identifier) @data_source_type)
65 | (get_attr (identifier) @name.reference.data)
66 | ) @reference.data
67 | (#eq? @ref_type "data")
68 |
69 | ((variable_expr) @resource_type
70 | (get_attr (identifier) @name.reference.resource)
71 | ) @reference.resource
72 | (#not-eq? @resource_type "var")
73 | (#not-eq? @resource_type "local")
74 | (#not-eq? @resource_type "module")
75 | (#not-eq? @resource_type "data")
76 | (#not-eq? @resource_type "provider")
77 | (#not-eq? @resource_type "output")
78 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Credits
3 |
4 | Aider uses modified versions of the tags.scm files from these open source
5 | tree-sitter language implementations:
6 |
7 | * [https://github.com/tree-sitter/tree-sitter-c](https://github.com/tree-sitter/tree-sitter-c) — licensed under the MIT License.
8 | * [https://github.com/tree-sitter/tree-sitter-c-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp) — licensed under the MIT License.
9 | * [https://github.com/tree-sitter/tree-sitter-cpp](https://github.com/tree-sitter/tree-sitter-cpp) — licensed under the MIT License.
10 | * [https://github.com/Wilfred/tree-sitter-elisp](https://github.com/Wilfred/tree-sitter-elisp) — licensed under the MIT License.
11 | * [https://github.com/elixir-lang/tree-sitter-elixir](https://github.com/elixir-lang/tree-sitter-elixir) — licensed under the Apache License, Version 2.0.
12 | * [https://github.com/elm-tooling/tree-sitter-elm](https://github.com/elm-tooling/tree-sitter-elm) — licensed under the MIT License.
13 | * [https://github.com/tree-sitter/tree-sitter-go](https://github.com/tree-sitter/tree-sitter-go) — licensed under the MIT License.
14 | * [https://github.com/tree-sitter/tree-sitter-java](https://github.com/tree-sitter/tree-sitter-java) — licensed under the MIT License.
15 | * [https://github.com/tree-sitter/tree-sitter-javascript](https://github.com/tree-sitter/tree-sitter-javascript) — licensed under the MIT License.
16 | * [https://github.com/tree-sitter/tree-sitter-ocaml](https://github.com/tree-sitter/tree-sitter-ocaml) — licensed under the MIT License.
17 | * [https://github.com/tree-sitter/tree-sitter-php](https://github.com/tree-sitter/tree-sitter-php) — licensed under the MIT License.
18 | * [https://github.com/tree-sitter/tree-sitter-python](https://github.com/tree-sitter/tree-sitter-python) — licensed under the MIT License.
19 | * [https://github.com/tree-sitter/tree-sitter-ql](https://github.com/tree-sitter/tree-sitter-ql) — licensed under the MIT License.
20 | * [https://github.com/r-lib/tree-sitter-r](https://github.com/r-lib/tree-sitter-r) — licensed under the MIT License.
21 | * [https://github.com/tree-sitter/tree-sitter-ruby](https://github.com/tree-sitter/tree-sitter-ruby) — licensed under the MIT License.
22 | * [https://github.com/tree-sitter/tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) — licensed under the MIT License.
23 | * [https://github.com/tree-sitter/tree-sitter-typescript](https://github.com/tree-sitter/tree-sitter-typescript) — licensed under the MIT License.
24 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/javascript-tags.scm:
--------------------------------------------------------------------------------
1 | (
2 | (comment)* @doc
3 | .
4 | (method_definition
5 | name: (property_identifier) @name.definition.method) @definition.method
6 | (#not-eq? @name.definition.method "constructor")
7 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
8 | (#select-adjacent! @doc @definition.method)
9 | )
10 |
11 | (
12 | (comment)* @doc
13 | .
14 | [
15 | (class
16 | name: (_) @name.definition.class)
17 | (class_declaration
18 | name: (_) @name.definition.class)
19 | ] @definition.class
20 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
21 | (#select-adjacent! @doc @definition.class)
22 | )
23 |
24 | (
25 | (comment)* @doc
26 | .
27 | [
28 | (function
29 | name: (identifier) @name.definition.function)
30 | (function_declaration
31 | name: (identifier) @name.definition.function)
32 | (generator_function
33 | name: (identifier) @name.definition.function)
34 | (generator_function_declaration
35 | name: (identifier) @name.definition.function)
36 | ] @definition.function
37 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
38 | (#select-adjacent! @doc @definition.function)
39 | )
40 |
41 | (
42 | (comment)* @doc
43 | .
44 | (lexical_declaration
45 | (variable_declarator
46 | name: (identifier) @name.definition.function
47 | value: [(arrow_function) (function)]) @definition.function)
48 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
49 | (#select-adjacent! @doc @definition.function)
50 | )
51 |
52 | (
53 | (comment)* @doc
54 | .
55 | (variable_declaration
56 | (variable_declarator
57 | name: (identifier) @name.definition.function
58 | value: [(arrow_function) (function)]) @definition.function)
59 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
60 | (#select-adjacent! @doc @definition.function)
61 | )
62 |
63 | (assignment_expression
64 | left: [
65 | (identifier) @name.definition.function
66 | (member_expression
67 | property: (property_identifier) @name.definition.function)
68 | ]
69 | right: [(arrow_function) (function)]
70 | ) @definition.function
71 |
72 | (pair
73 | key: (property_identifier) @name.definition.function
74 | value: [(arrow_function) (function)]) @definition.function
75 |
76 | (
77 | (call_expression
78 | function: (identifier) @name.reference.call) @reference.call
79 | (#not-match? @name.reference.call "^(require)$")
80 | )
81 |
82 | (call_expression
83 | function: (member_expression
84 | property: (property_identifier) @name.reference.call)
85 | arguments: (_) @reference.call)
86 |
87 | (new_expression
88 | constructor: (_) @name.reference.class) @reference.class
89 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/dart-tags.scm:
--------------------------------------------------------------------------------
1 | (class_definition
2 | name: (identifier) @name.definition.class) @definition.class
3 |
4 | (method_signature
5 | (function_signature)) @definition.method
6 |
7 | (type_alias
8 | (type_identifier) @name.definition.type) @definition.type
9 |
10 | (method_signature
11 | (getter_signature
12 | name: (identifier) @name.definition.method)) @definition.method
13 |
14 | (method_signature
15 | (setter_signature
16 | name: (identifier) @name.definition.method)) @definition.method
17 |
18 | (method_signature
19 | (function_signature
20 | name: (identifier) @name.definition.method)) @definition.method
21 |
22 | (method_signature
23 | (factory_constructor_signature
24 | (identifier) @name.definition.method)) @definition.method
25 |
26 | (method_signature
27 | (constructor_signature
28 | name: (identifier) @name.definition.method)) @definition.method
29 |
30 | (method_signature
31 | (operator_signature)) @definition.method
32 |
33 | (method_signature) @definition.method
34 |
35 | (mixin_declaration
36 | (mixin)
37 | (identifier) @name.definition.mixin) @definition.mixin
38 |
39 | (extension_declaration
40 | name: (identifier) @name.definition.extension) @definition.extension
41 |
42 | (enum_declaration
43 | name: (identifier) @name.definition.enum) @definition.enum
44 |
45 | (function_signature
46 | name: (identifier) @name.definition.function) @definition.function
47 |
48 | (new_expression
49 | (type_identifier) @name.reference.class) @reference.class
50 |
51 | (initialized_variable_definition
52 | name: (identifier)
53 | value: (identifier) @name.reference.class
54 | value: (selector
55 | "!"?
56 | (argument_part
57 | (arguments
58 | (argument)*))?)?) @reference.class
59 |
60 | (assignment_expression
61 | left: (assignable_expression
62 | (identifier)
63 | (unconditional_assignable_selector
64 | "."
65 | (identifier) @name.reference.call))) @reference.call
66 |
67 | (assignment_expression
68 | left: (assignable_expression
69 | (identifier)
70 | (conditional_assignable_selector
71 | "?."
72 | (identifier) @name.reference.call))) @reference.call
73 |
74 | ((identifier) @name
75 | (selector
76 | "!"?
77 | (conditional_assignable_selector
78 | "?." (identifier) @name.reference.call)?
79 | (unconditional_assignable_selector
80 | "."? (identifier) @name.reference.call)?
81 | (argument_part
82 | (arguments
83 | (argument)*))?)*
84 | (cascade_section
85 | (cascade_selector
86 | (identifier)) @name.reference.call
87 | (argument_part
88 | (arguments
89 | (argument)*))?)?) @reference.call
90 |
91 |
92 |
--------------------------------------------------------------------------------
/queries/tree-sitter-language-pack/javascript-tags.scm:
--------------------------------------------------------------------------------
1 | (
2 | (comment)* @doc
3 | .
4 | (method_definition
5 | name: (property_identifier) @name.definition.method) @definition.method
6 | (#not-eq? @name.definition.method "constructor")
7 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
8 | (#select-adjacent! @doc @definition.method)
9 | )
10 |
11 | (
12 | (comment)* @doc
13 | .
14 | [
15 | (class
16 | name: (_) @name.definition.class)
17 | (class_declaration
18 | name: (_) @name.definition.class)
19 | ] @definition.class
20 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
21 | (#select-adjacent! @doc @definition.class)
22 | )
23 |
24 | (
25 | (comment)* @doc
26 | .
27 | [
28 | (function_expression
29 | name: (identifier) @name.definition.function)
30 | (function_declaration
31 | name: (identifier) @name.definition.function)
32 | (generator_function
33 | name: (identifier) @name.definition.function)
34 | (generator_function_declaration
35 | name: (identifier) @name.definition.function)
36 | ] @definition.function
37 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
38 | (#select-adjacent! @doc @definition.function)
39 | )
40 |
41 | (
42 | (comment)* @doc
43 | .
44 | (lexical_declaration
45 | (variable_declarator
46 | name: (identifier) @name.definition.function
47 | value: [(arrow_function) (function_expression)]) @definition.function)
48 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
49 | (#select-adjacent! @doc @definition.function)
50 | )
51 |
52 | (
53 | (comment)* @doc
54 | .
55 | (variable_declaration
56 | (variable_declarator
57 | name: (identifier) @name.definition.function
58 | value: [(arrow_function) (function_expression)]) @definition.function)
59 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
60 | (#select-adjacent! @doc @definition.function)
61 | )
62 |
63 | (assignment_expression
64 | left: [
65 | (identifier) @name.definition.function
66 | (member_expression
67 | property: (property_identifier) @name.definition.function)
68 | ]
69 | right: [(arrow_function) (function_expression)]
70 | ) @definition.function
71 |
72 | (pair
73 | key: (property_identifier) @name.definition.function
74 | value: [(arrow_function) (function_expression)]) @definition.function
75 |
76 | (
77 | (call_expression
78 | function: (identifier) @name.reference.call) @reference.call
79 | (#not-match? @name.reference.call "^(require)$")
80 | )
81 |
82 | (call_expression
83 | function: (member_expression
84 | property: (property_identifier) @name.reference.call)
85 | arguments: (_) @reference.call)
86 |
87 | (new_expression
88 | constructor: (_) @name.reference.class) @reference.class
89 |
--------------------------------------------------------------------------------
/REPLIT_SETUP_COMPLETE.md:
--------------------------------------------------------------------------------
1 | # ✅ Emigo Setup Complete on Replit
2 |
3 | ## Summary
4 |
5 | This Emigo project has been successfully set up on Replit. All Python dependencies are installed and validated.
6 |
7 | ## What Was Done
8 |
9 | ### 1. Python Environment
10 | - ✅ Installed Python 3.11
11 | - ✅ Installed all dependencies from requirements.txt:
12 | - litellm (LLM provider integration)
13 | - epc (Emacs-Python communication)
14 | - networkx, scipy (graph/math operations)
15 | - tiktoken (token counting)
16 | - grep-ast, tree-sitter (code parsing)
17 | - And 30+ other dependencies
18 |
19 | ### 2. Project Configuration
20 | - ✅ Created `.gitignore` with Python and Emacs patterns
21 | - ✅ Created `replit.md` with project documentation
22 | - ✅ Created `test_setup.py` validation script
23 | - ✅ Created `README_REPLIT.md` usage guide
24 |
25 | ### 3. Validation
26 | - ✅ All 11 Python dependencies import successfully
27 | - ✅ All 11 core modules pass syntax validation
28 | - ✅ Setup validation workflow runs successfully
29 |
30 | ## Current Status
31 |
32 | **The project is ready to use!**
33 |
34 | ### For Emacs Users:
35 | 1. Clone this repository to your local machine
36 | 2. Install in Emacs using straight.el (see README_REPLIT.md)
37 | 3. Configure your API keys
38 | 4. Run `M-x emigo` in your project
39 |
40 | ### For Developers on Replit:
41 | 1. Click "Run" to see validation test results
42 | 2. Modify Python backend files as needed
43 | 3. Test with: `python3 test_setup.py`
44 |
45 | ## Important Notes
46 |
47 | ⚠️ **This is an Emacs plugin, not a web application**
48 | - No web server runs on port 5000
49 | - Requires Emacs to function
50 | - Python backend communicates with Emacs via EPC
51 |
52 | ## Files Added for Replit
53 |
54 | 1. `replit.md` - Project overview and architecture
55 | 2. `test_setup.py` - Setup validation script
56 | 3. `README_REPLIT.md` - Replit-specific usage guide
57 | 4. `REPLIT_SETUP_COMPLETE.md` - This summary
58 | 5. `.gitignore` - Updated with Python/Emacs patterns
59 |
60 | ## Validation Results
61 |
62 | ```
63 | Testing Python dependencies... ✅ 11/11 OK
64 | Testing Emigo core modules... ✅ 11/11 OK
65 |
66 | ✅ SETUP VALIDATION PASSED
67 | ```
68 |
69 | All modules loaded:
70 | - epc, networkx, pygments, grep_ast, diskcache
71 | - tiktoken, tqdm, gitignore_parser, scipy
72 | - litellm, orjson
73 |
74 | All code validated:
75 | - config.py, utils.py, session.py, tools.py
76 | - tool_definitions.py, agent.py, llm.py
77 | - llm_providers.py, llm_worker.py
78 | - repomapper.py, emigo.py
79 |
80 | ## Next Steps
81 |
82 | Choose your path:
83 |
84 | **A. Use in Emacs** (recommended)
85 | - See README_REPLIT.md for installation
86 |
87 | **B. Develop on Replit**
88 | - All Python dependencies are ready
89 | - Make changes and test with validation script
90 |
91 | **C. Install Emacs on Replit** (advanced)
92 | ```bash
93 | nix-env -iA nixpkgs.emacs
94 | ```
95 |
96 | ---
97 |
98 | Setup completed: October 13, 2025
99 |
--------------------------------------------------------------------------------
/llm_providers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | Handles formatting tool definitions for specific LLM providers.
6 |
7 | Currently supports formatting for OpenAI's function/tool calling API.
8 | Can be extended to support other providers like Anthropic, Google Gemini, etc.
9 | """
10 |
11 | from typing import List, Dict, Any
12 | from tool_definitions import ToolDefinition
13 |
14 | def format_tools_for_openai(tools: List[ToolDefinition]) -> List[Dict[str, Any]]:
15 | """
16 | Formats a list of ToolDefinition objects into the JSON structure
17 | expected by OpenAI's Chat Completions API (for tool_choice='auto').
18 | """
19 | openai_tools = []
20 | for tool in tools:
21 | properties = {}
22 | required_params = []
23 | for param in tool.get('parameters', []): # Use .get for safety
24 | # Basic JSON schema type mapping
25 | param_type = param.get('type', 'string') # Default to string if missing
26 | properties[param['name']] = {
27 | "type": param_type,
28 | "description": param.get('description', '') # Default description
29 | }
30 | if param_type == "array":
31 | properties[param['name']]['items'] = param.get('items', {"type": "string"})
32 |
33 | if param.get('required', False): # Default to not required
34 | required_params.append(param['name'])
35 |
36 | openai_tools.append({
37 | "type": "function",
38 | "function": {
39 | "name": tool.get('name', 'unknown_tool'), # Default name
40 | "description": tool.get('description', ''), # Default description
41 | "parameters": {
42 | "type": "object",
43 | "properties": properties,
44 | "required": required_params
45 | }
46 | }
47 | })
48 | return openai_tools
49 |
50 | # --- Add formatters for other providers as needed ---
51 | # def format_tools_for_anthropic(tools: List[ToolDefinition]) -> List[Dict[str, Any]]:
52 | # # Implementation for Anthropic's tool format
53 | # pass
54 |
55 | # def format_tools_for_google(tools: List[ToolDefinition]) -> List[Dict[str, Any]]:
56 | # # Implementation for Google Gemini's tool format
57 | # pass
58 |
59 | # --- Provider Selection Logic (Example) ---
60 | # You might have logic elsewhere to choose the correct formatter based on the LLM model name
61 | def get_formatted_tools(tools: List[ToolDefinition], model_name: str) -> List[Dict[str, Any]]:
62 | """Selects the appropriate formatter based on the model name."""
63 | # Simple example: default to OpenAI format
64 | # Add more sophisticated logic if supporting multiple providers
65 | if "claude" in model_name.lower():
66 | # return format_tools_for_anthropic(tools)
67 | pass # Placeholder
68 | elif "gemini" in model_name.lower():
69 | # return format_tools_for_google(tools)
70 | pass # Placeholder
71 | else: # Default to OpenAI
72 | return format_tools_for_openai(tools)
73 |
74 | # Fallback if no specific provider matched
75 | return format_tools_for_openai(tools)
76 |
--------------------------------------------------------------------------------
/queries/tree-sitter-languages/ocaml-tags.scm:
--------------------------------------------------------------------------------
1 | ; Modules
2 | ;--------
3 |
4 | (
5 | (comment)? @doc .
6 | (module_definition (module_binding (module_name) @name.definition.module) @definition.module)
7 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
8 | )
9 |
10 | (module_path (module_name) @name.reference.module) @reference.module
11 |
12 | ; Module types
13 | ;--------------
14 |
15 | (
16 | (comment)? @doc .
17 | (module_type_definition (module_type_name) @name.definition.interface) @definition.interface
18 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
19 | )
20 |
21 | (module_type_path (module_type_name) @name.reference.implementation) @reference.implementation
22 |
23 | ; Functions
24 | ;----------
25 |
26 | (
27 | (comment)? @doc .
28 | (value_definition
29 | [
30 | (let_binding
31 | pattern: (value_name) @name.definition.function
32 | (parameter))
33 | (let_binding
34 | pattern: (value_name) @name.definition.function
35 | body: [(fun_expression) (function_expression)])
36 | ] @definition.function
37 | )
38 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
39 | )
40 |
41 | (
42 | (comment)? @doc .
43 | (external (value_name) @name.definition.function) @definition.function
44 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
45 | )
46 |
47 | (application_expression
48 | function: (value_path (value_name) @name.reference.call)) @reference.call
49 |
50 | (infix_expression
51 | left: (value_path (value_name) @name.reference.call)
52 | operator: (concat_operator) @reference.call
53 | (#eq? @reference.call "@@"))
54 |
55 | (infix_expression
56 | operator: (rel_operator) @reference.call
57 | right: (value_path (value_name) @name.reference.call)
58 | (#eq? @reference.call "|>"))
59 |
60 | ; Operator
61 | ;---------
62 |
63 | (
64 | (comment)? @doc .
65 | (value_definition
66 | (let_binding
67 | pattern: (parenthesized_operator (_) @name.definition.function)) @definition.function)
68 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
69 | )
70 |
71 | [
72 | (prefix_operator)
73 | (sign_operator)
74 | (pow_operator)
75 | (mult_operator)
76 | (add_operator)
77 | (concat_operator)
78 | (rel_operator)
79 | (and_operator)
80 | (or_operator)
81 | (assign_operator)
82 | (hash_operator)
83 | (indexing_operator)
84 | (let_operator)
85 | (let_and_operator)
86 | (match_operator)
87 | ] @name.reference.call @reference.call
88 |
89 | ; Classes
90 | ;--------
91 |
92 | (
93 | (comment)? @doc .
94 | [
95 | (class_definition (class_binding (class_name) @name.definition.class) @definition.class)
96 | (class_type_definition (class_type_binding (class_type_name) @name.definition.class) @definition.class)
97 | ]
98 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
99 | )
100 |
101 | [
102 | (class_path (class_name) @name.reference.class)
103 | (class_type_path (class_type_name) @name.reference.class)
104 | ] @reference.class
105 |
106 | ; Methods
107 | ;--------
108 |
109 | (
110 | (comment)? @doc .
111 | (method_definition (method_name) @name.definition.method) @definition.method
112 | (#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
113 | )
114 |
115 | (method_invocation (method_name) @name.reference.call) @reference.call
116 |
--------------------------------------------------------------------------------
/replit.md:
--------------------------------------------------------------------------------
1 | # Emigo - AI Assistant for Emacs
2 |
3 | ## Project Overview
4 |
5 | **Emigo** is an intelligent, agentic Emacs-native AI assistant that understands and interacts with your codebase. It's designed to run as a Python backend that communicates with Emacs via EPC (Emacs Process Communication).
6 |
7 | **Key Features:**
8 | - Agentic tool use - interacts with your environment based on LLM reasoning
9 | - Native Emacs integration
10 | - Flexible LLM support via LiteLLM (OpenRouter, Deepseek, etc.)
11 | - Context-aware interactions with chat history and project context
12 |
13 | ## Project Type
14 |
15 | This is **NOT a web application** - it's an Emacs plugin with a Python backend. The Python code (`emigo.py`) runs as an EPC server that Emacs connects to for AI-powered coding assistance.
16 |
17 | ## Architecture
18 |
19 | ### Main Components:
20 | 1. **emigo.el** - Emacs Lisp frontend
21 | 2. **emigo.py** - Python EPC orchestrator (main backend)
22 | 3. **llm_worker.py** - LLM interaction subprocess
23 | 4. **session.py** - Session management
24 | 5. **tools.py** - Tool implementations (read_file, write_file, execute_command, etc.)
25 | 6. **repomapper.py** - Repository mapping for context
26 |
27 | ### How It Works:
28 | 1. Emacs starts the Python backend (`emigo.py`) as a subprocess
29 | 2. Python starts an EPC server on a random port
30 | 3. Emacs connects to the EPC server
31 | 4. User interacts via Emacs buffers
32 | 5. LLM worker handles AI interactions and tool execution
33 |
34 | ## Replit Setup Status
35 |
36 | ✅ **Python 3.11 installed**
37 | ✅ **All Python dependencies installed** (litellm, epc, networkx, etc.)
38 | ✅ **Project is ready to use**
39 |
40 | ## Usage in Emacs
41 |
42 | This project is designed to be used from within Emacs:
43 |
44 | 1. **Install in Emacs** using straight.el:
45 | ```emacs-lisp
46 | (use-package emigo
47 | :straight (:host github :repo "MatthewZMD/emigo" :files (:defaults "*.py" "*.el"))
48 | :config
49 | (emigo-enable)
50 | :custom
51 | (emigo-model "openrouter/deepseek/deepseek-chat-v3-0324")
52 | (emigo-base-url "https://openrouter.ai/api/v1")
53 | (emigo-api-key (getenv "OPENROUTER_API_KEY")))
54 | ```
55 |
56 | 2. **Basic Usage:**
57 | - `M-x emigo` - Start Emigo in your project
58 | - Type prompts and press `C-c C-c` to send
59 | - Use `@filename` to add files to context
60 | - `C-c f` - Add files interactively
61 | - `C-c l` - List files in context
62 | - `C-c H` - Clear chat history
63 |
64 | ## Testing on Replit
65 |
66 | Since this is an Emacs plugin and Replit doesn't have Emacs pre-installed, you can:
67 |
68 | 1. **Validate Python Setup:**
69 | ```bash
70 | python3 --version # Check Python
71 | pip list | grep -E "(litellm|epc)" # Verify dependencies
72 | ```
73 |
74 | 2. **Check Code Structure:**
75 | ```bash
76 | python3 -m py_compile emigo.py # Syntax check
77 | ```
78 |
79 | 3. **Install Emacs (optional):**
80 | If you want to actually run Emigo in Replit, you would need to install Emacs first.
81 |
82 | ## Development Notes
83 |
84 | - Written in Python 3.x (tested with 3.11)
85 | - Uses EPC for Emacs-Python communication
86 | - LLM interactions via LiteLLM (supports multiple providers)
87 | - Active development - expect breaking changes
88 |
89 | ## Recent Setup
90 |
91 | - 2025-10-13: Imported to Replit, installed Python 3.11 and all dependencies
92 |
--------------------------------------------------------------------------------
/test_setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Test script to validate Emigo setup on Replit.
4 | Since Emigo is an Emacs plugin, this script validates that:
5 | 1. All Python dependencies are installed
6 | 2. Core modules can be imported
7 | 3. Configuration is valid
8 | """
9 |
10 | import sys
11 | import importlib
12 |
13 | def test_imports():
14 | """Test that all required modules can be imported."""
15 | required_modules = [
16 | 'epc',
17 | 'networkx',
18 | 'pygments',
19 | 'grep_ast',
20 | 'diskcache',
21 | 'tiktoken',
22 | 'tqdm',
23 | 'gitignore_parser',
24 | 'scipy',
25 | 'litellm',
26 | 'orjson',
27 | ]
28 |
29 | print("Testing Python dependencies...")
30 | print("-" * 50)
31 |
32 | failed = []
33 | for module in required_modules:
34 | try:
35 | importlib.import_module(module)
36 | print(f"✓ {module:20s} - OK")
37 | except ImportError as e:
38 | print(f"✗ {module:20s} - FAILED: {e}")
39 | failed.append(module)
40 |
41 | print("-" * 50)
42 | if failed:
43 | print(f"\n❌ {len(failed)} module(s) failed to import: {', '.join(failed)}")
44 | return False
45 | else:
46 | print("\n✅ All dependencies imported successfully!")
47 | return True
48 |
49 | def test_core_modules():
50 | """Test that Emigo core modules are valid."""
51 | print("\nTesting Emigo core modules...")
52 | print("-" * 50)
53 |
54 | core_modules = [
55 | 'config',
56 | 'utils',
57 | 'session',
58 | 'tools',
59 | 'tool_definitions',
60 | 'agent',
61 | 'llm',
62 | 'llm_providers',
63 | 'llm_worker',
64 | 'repomapper',
65 | 'emigo',
66 | ]
67 |
68 | failed = []
69 | for module in core_modules:
70 | try:
71 | # Just compile, don't import (to avoid EPC connection issues)
72 | with open(f"{module}.py", 'r') as f:
73 | compile(f.read(), f"{module}.py", 'exec')
74 | print(f"✓ {module}.py - Syntax OK")
75 | except Exception as e:
76 | print(f"✗ {module}.py - FAILED: {e}")
77 | failed.append(module)
78 |
79 | print("-" * 50)
80 | if failed:
81 | print(f"\n❌ {len(failed)} module(s) have issues: {', '.join(failed)}")
82 | return False
83 | else:
84 | print("\n✅ All core modules are syntactically valid!")
85 | return True
86 |
87 | def main():
88 | print("=" * 50)
89 | print("Emigo Setup Validation Test")
90 | print("=" * 50)
91 | print()
92 |
93 | # Test dependencies
94 | deps_ok = test_imports()
95 |
96 | # Test core modules
97 | core_ok = test_core_modules()
98 |
99 | print("\n" + "=" * 50)
100 | if deps_ok and core_ok:
101 | print("✅ SETUP VALIDATION PASSED")
102 | print("\nEmigo is ready to use from Emacs!")
103 | print("\nNote: This is an Emacs plugin. To use it:")
104 | print("1. Install in Emacs using straight.el")
105 | print("2. Configure API keys (OPENROUTER_API_KEY, etc.)")
106 | print("3. Run M-x emigo in your project")
107 | return 0
108 | else:
109 | print("❌ SETUP VALIDATION FAILED")
110 | print("\nSome components need attention.")
111 | return 1
112 | print("=" * 50)
113 |
114 | if __name__ == "__main__":
115 | sys.exit(main())
116 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # Emigo: Future of Agentic Development in Emacs
6 |
7 | Emigo intends to be an intelligent, agentic Emacs-native AI assistant that understands and interacts with your codebase. Brought to you by the creators of [Emacs Application Framework](https://github.com/emacs-eaf/emacs-application-framework) and [lsp-bridge](https://github.com/manateelazycat/lsp-bridge), built on the shoulders of giants like [Cursor](https://www.cursor.com/en), [Aider](https://github.com/paul-gauthier/aider) and [Cline](https://github.com/sturdy-dev/cline), Emigo is the spiritual successor to [Aidermacs](https://github.com/MatthewZMD/aidermacs), reimagined from the ground up for Emacs.
8 |
9 | ## ⚠️ Emigo is under *very* active development, experimentation, and rapid-prototyping ⚠️
10 |
11 | The project is at its early-stage. Expect frequent breaking changes and unstable features. Please use only for testing, try it out and report issues - your feedback helps shape Emigo!
12 |
13 | ## Key Features
14 |
15 | * **Agentic Tool Use:** Emigo doesn't just generate text; it uses tools to interact with your environment based on the LLM's reasoning.
16 | * **Emacs Integration:** Designed to feel native within Emacs, leveraging familiar interfaces and workflows.
17 | * **Flexible LLM Support:** Connects to various LLM providers through [LiteLLM](https://github.com/BerriAI/litellm), allowing you to choose the model that best suits your needs.
18 | * **Context-Aware Interactions:** Manages chat history and project context for coherent sessions.
19 |
20 | ## Installation
21 |
22 | 1. **Prerequisites:**
23 | * Emacs 28 or higher.
24 | * Python 3.x.
25 | 2. **Install Python Dependencies:**
26 | ```bash
27 | pip install -r requirements.txt
28 | ```
29 | 3. **Install with straight.el:** Add to your Emacs config:
30 |
31 | ```emacs-lisp
32 | (use-package emigo
33 | :straight (:host github :repo "MatthewZMD/emigo" :files (:defaults "*.py" "*.el"))
34 | :config
35 | (emigo-enable) ;; Starts the background process automatically
36 | :custom
37 | ;; Encourage using OpenRouter with Deepseek
38 | (emigo-model "openrouter/deepseek/deepseek-chat-v3-0324")
39 | (emigo-base-url "https://openrouter.ai/api/v1")
40 | (emigo-api-key (getenv "OPENROUTER_API_KEY")))
41 | ```
42 |
43 | ## Usage
44 |
45 | ### Basic Interaction
46 | 1. **Start Emigo:** Navigate to your project directory (or any directory you want to work in) and run `M-x emigo`.
47 | 2. **Interact:** Emigo will open a dedicated buffer. The AI will respond, potentially using tools. You might be asked for approval for certain actions (like running commands or writing files).
48 | 3. **Send Prompts:** Type your prompt and press `C-c C-c` or `C-m` to send it to Emigo.
49 |
50 | ### Context Management
51 | - **Add Files:**
52 | - Mention files in your prompt using `@` (e.g., `Refactor @src/utils.py`)
53 | - Or use `C-c f` to interactively add files
54 | - **List Files in Context:** `C-c l`
55 | - **Remove Files from Context:** `C-c j`
56 | - **Clear Chat History:** `C-c H`
57 | - **View History:** `C-c h` (shows in Org mode buffer)
58 |
59 | ### Keybindings
60 | #### Core Commands
61 | - `C-c C-c` / `C-m` - Send current prompt
62 | - `C-c r` - Restart Emigo process
63 | - `C-c k` - Cancel current interaction
64 |
65 | Note: Emigo manages sessions based on the directory where you invoke `M-x emigo`. If invoked within a Git repository, the repository root is typically used as the session path. Use `C-u M-x emigo` to force the session path to be the current `default-directory`.
66 |
67 | ## Understanding Tool Use
68 |
69 | The core of Emigo's power lies in its agentic tool use. Instead of just providing code suggestions, the LLM analyzes your request and decides which actions (tools) are necessary to accomplish the task.
70 |
71 | 1. **LLM Reasoning:** Based on your prompt and the current context, the LLM determines the next step.
72 | 2. **Tool Selection:** It chooses an appropriate tool, such as `read_file`, `write_to_file`, `replace_in_file`, `execute_command`, `list_files`, `list_repomap`, or `ask_followup_question`.
73 | 3. **Tool Execution:** Emigo executes the chosen tool, potentially asking for your approval for sensitive operations.
74 | 4. **Result Feedback:** The result of the tool execution (e.g., file content, command output, error message) is fed back into the conversation history.
75 | 5. **Iteration:** The LLM uses this new information to decide the next step, continuing the cycle until the task is complete or requires further input.
76 |
77 | This iterative process allows Emigo to tackle more complex tasks that involve multiple steps and interactions with your project files and system. The LLM uses an XML format to specify the tool and its parameters.
78 |
--------------------------------------------------------------------------------
/README_REPLIT.md:
--------------------------------------------------------------------------------
1 | # Emigo on Replit - Setup Guide
2 |
3 | ## What is Emigo?
4 |
5 | **Emigo** is an AI coding assistant for Emacs - it's an intelligent, agentic plugin that understands and interacts with your codebase directly within Emacs.
6 |
7 | ⚠️ **Important:** This is **NOT a web application**. It's an Emacs editor plugin with a Python backend.
8 |
9 | ## What's Set Up on Replit
10 |
11 | ✅ **Python 3.11** - Installed and configured
12 | ✅ **All Dependencies** - litellm, epc, networkx, pygments, and all other requirements
13 | ✅ **Code Validation** - All modules pass syntax checks
14 | ✅ **Test Workflow** - Validation test runs successfully
15 |
16 | ## How to Use This Project
17 |
18 | ### Option 1: Use in Your Own Emacs Setup
19 |
20 | 1. **Clone this repo to your local machine:**
21 | ```bash
22 | git clone
23 | cd emigo
24 | ```
25 |
26 | 2. **Ensure Python dependencies are installed:**
27 | ```bash
28 | pip install -r requirements.txt
29 | ```
30 |
31 | 3. **Install in Emacs** using straight.el:
32 | ```emacs-lisp
33 | (use-package emigo
34 | :straight (:host github :repo "MatthewZMD/emigo" :files (:defaults "*.py" "*.el"))
35 | :config
36 | (emigo-enable)
37 | :custom
38 | (emigo-model "openrouter/deepseek/deepseek-chat-v3-0324")
39 | (emigo-base-url "https://openrouter.ai/api/v1")
40 | (emigo-api-key (getenv "OPENROUTER_API_KEY")))
41 | ```
42 |
43 | 4. **Set up your API key** (e.g., for OpenRouter):
44 | ```bash
45 | export OPENROUTER_API_KEY="your-api-key-here"
46 | ```
47 |
48 | 5. **Use in Emacs:**
49 | - Open your project in Emacs
50 | - Run `M-x emigo`
51 | - Start coding with AI assistance!
52 |
53 | ### Option 2: Test/Develop on Replit
54 |
55 | Since Emacs isn't pre-installed on Replit, you can:
56 |
57 | 1. **Validate the setup** (already configured):
58 | - Click the "Run" button to see the validation test
59 | - All dependencies and modules should show ✓ OK
60 |
61 | 2. **Install Emacs on Replit** (optional, for testing):
62 | ```bash
63 | # Install Emacs
64 | nix-env -iA nixpkgs.emacs
65 |
66 | # Verify installation
67 | emacs --version
68 | ```
69 |
70 | 3. **Develop/modify the Python backend:**
71 | - All Python files are ready for development
72 | - Core modules: `emigo.py`, `llm_worker.py`, `session.py`, `tools.py`
73 | - Test your changes with: `python3 test_setup.py`
74 |
75 | ## Key Features
76 |
77 | - 🤖 **Agentic Tool Use** - LLM can execute tools (read/write files, run commands)
78 | - 🔌 **Flexible LLM Support** - Works with OpenRouter, Deepseek, OpenAI, and more
79 | - 📝 **Context-Aware** - Maintains chat history and project context
80 | - 🎯 **Repository Mapping** - Understands your codebase structure
81 |
82 | ## Emacs Commands (when running in Emacs)
83 |
84 | - `M-x emigo` - Start Emigo
85 | - `C-c C-c` or `C-m` - Send prompt
86 | - `C-c f` - Add files to context
87 | - `C-c l` - List files in context
88 | - `C-c j` - Remove files from context
89 | - `C-c H` - Clear chat history
90 | - `C-c r` - Restart Emigo process
91 |
92 | ## Project Structure
93 |
94 | ```
95 | emigo/
96 | ├── emigo.py # Main Python backend (EPC server)
97 | ├── llm_worker.py # LLM interaction subprocess
98 | ├── emigo.el # Emacs Lisp frontend
99 | ├── emigo-epc.el # EPC client for Emacs
100 | ├── session.py # Session management
101 | ├── tools.py # Tool implementations
102 | ├── repomapper.py # Repository mapping
103 | ├── requirements.txt # Python dependencies
104 | └── test_setup.py # Validation script
105 | ```
106 |
107 | ## Architecture
108 |
109 | 1. **Emacs** (frontend) ↔️ **EPC** ↔️ **emigo.py** (backend)
110 | 2. **emigo.py** manages **llm_worker.py** subprocess
111 | 3. **llm_worker.py** handles LLM API calls via LiteLLM
112 | 4. Tools execute in **tools.py** and report back to Emacs
113 |
114 | ## For Developers
115 |
116 | If you're contributing to Emigo:
117 |
118 | 1. **Run validation:** `python3 test_setup.py`
119 | 2. **Syntax check:** `python3 -m py_compile .py`
120 | 3. **Check imports:** `python3 -c "import "`
121 |
122 | ## Resources
123 |
124 | - **Original Repository:** [MatthewZMD/emigo](https://github.com/MatthewZMD/emigo)
125 | - **Documentation:** See main [README.md](README.md)
126 | - **Emacs Config:** See [emigo.el](emigo.el) for Elisp side
127 |
128 | ## Getting Help
129 |
130 | This project is in active development. Check the [issues page](https://github.com/MatthewZMD/emigo/issues) for known problems and feature requests.
131 |
132 | ---
133 |
134 | **Note:** Emigo requires Emacs to run. This Replit setup validates that the Python backend is correctly configured and ready to use.
135 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | Central Configuration for Emigo.
6 |
7 | This module stores shared constants and configuration settings used across
8 | different parts of the Emigo Python backend. This includes lists for
9 | ignoring directories and file extensions during repository scanning, and
10 | a list defining files considered "important" at the root of a project for
11 | prioritization in the repository map.
12 |
13 | Centralizing these configurations makes them easier to manage and modify.
14 | """
15 |
16 | import os
17 |
18 |
19 | # --- Tool Result/Error Messages ---
20 |
21 | TOOL_RESULT_SUCCESS = "Tool executed successfully."
22 | TOOL_RESULT_OUTPUT_PREFIX = "Tool output:\n"
23 | TOOL_DENIED = "The user denied this operation."
24 | TOOL_ERROR_PREFIX = "[Tool Error] "
25 | TOOL_ERROR_SUFFIX = ""
26 |
27 |
28 | # --- Ignored Directories ---
29 | # Used in agents.py (_get_environment_details) and repomapper.py (_find_src_files)
30 | # Combine common ignored directories from both places.
31 | IGNORED_DIRS = [
32 | r'^\.emigo_repomap$',
33 | r'^\.aider.*$',
34 | r'^\.(git|hg|svn)$', # Version control
35 | r'^__pycache__$', # Python cache
36 | r'^node_modules$', # Node.js dependencies
37 | r'^(\.venv|venv|\.env|env)$', # Virtual environments
38 | r'^(build|dist)$', # Build artifacts
39 | r'^vendor$' # Vendor dependencies (common in some languages)
40 | ]
41 |
42 | # --- Ignored File Extensions (Binary/Non-Source) ---
43 | # Used in repomapper.py (_find_src_files)
44 | BINARY_EXTS = {
45 | # Images
46 | '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.ico', '.svg',
47 | # Media
48 | '.mp3', '.mp4', '.mov', '.avi', '.mkv', '.wav',
49 | # Archives
50 | '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar',
51 | # Documents
52 | '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
53 | # Other binaries
54 | '.exe', '.dll', '.so', '.o', '.a', '.class', '.jar',
55 | # Logs/Temp
56 | '.log', '.tmp', '.swp'
57 | }
58 |
59 | # --- Important Files (Root Level) ---
60 | # Used in repomapper.py (is_important)
61 | # List of filenames/paths considered important at the root of a project.
62 | ROOT_IMPORTANT_FILES_LIST = [
63 | # Version Control
64 | ".gitignore",
65 | ".gitattributes",
66 | # Documentation
67 | "README",
68 | "README.md",
69 | "README.txt",
70 | "README.rst",
71 | "CONTRIBUTING",
72 | "CONTRIBUTING.md",
73 | "CONTRIBUTING.txt",
74 | "CONTRIBUTING.rst",
75 | "LICENSE",
76 | "LICENSE.md",
77 | "LICENSE.txt",
78 | "CHANGELOG",
79 | "CHANGELOG.md",
80 | "CHANGELOG.txt",
81 | "CHANGELOG.rst",
82 | "SECURITY",
83 | "SECURITY.md",
84 | "SECURITY.txt",
85 | "CODEOWNERS",
86 | # Package Management and Dependencies
87 | "requirements.txt",
88 | "Pipfile",
89 | "Pipfile.lock",
90 | "pyproject.toml",
91 | "setup.py",
92 | "setup.cfg",
93 | "package.json",
94 | "package-lock.json",
95 | "yarn.lock",
96 | "npm-shrinkwrap.json",
97 | "Gemfile",
98 | "Gemfile.lock",
99 | "composer.json",
100 | "composer.lock",
101 | "pom.xml",
102 | "build.gradle",
103 | "build.gradle.kts",
104 | "build.sbt",
105 | "go.mod",
106 | "go.sum",
107 | "Cargo.toml",
108 | "Cargo.lock",
109 | "mix.exs",
110 | "rebar.config",
111 | "project.clj",
112 | "Podfile",
113 | "Cartfile",
114 | "dub.json",
115 | "dub.sdl",
116 | # Configuration and Settings
117 | ".env",
118 | ".env.example",
119 | ".editorconfig",
120 | "tsconfig.json",
121 | "jsconfig.json",
122 | ".babelrc",
123 | "babel.config.js",
124 | ".eslintrc",
125 | ".eslintignore",
126 | ".prettierrc",
127 | ".stylelintrc",
128 | "tslint.json",
129 | ".pylintrc",
130 | ".flake8",
131 | ".rubocop.yml",
132 | ".scalafmt.conf",
133 | ".dockerignore",
134 | ".gitpod.yml",
135 | "sonar-project.properties",
136 | "renovate.json",
137 | "dependabot.yml",
138 | ".pre-commit-config.yaml",
139 | "mypy.ini",
140 | "tox.ini",
141 | ".yamllint",
142 | "pyrightconfig.json",
143 | # Build and Compilation
144 | "webpack.config.js",
145 | "rollup.config.js",
146 | "parcel.config.js",
147 | "gulpfile.js",
148 | "Gruntfile.js",
149 | "build.xml",
150 | "build.boot",
151 | "project.json",
152 | "build.cake",
153 | "MANIFEST.in",
154 | # Testing
155 | "pytest.ini",
156 | "phpunit.xml",
157 | "karma.conf.js",
158 | "jest.config.js",
159 | "cypress.json",
160 | ".nycrc",
161 | ".nycrc.json",
162 | # CI/CD
163 | ".travis.yml",
164 | ".gitlab-ci.yml",
165 | "Jenkinsfile",
166 | "azure-pipelines.yml",
167 | "bitbucket-pipelines.yml",
168 | "appveyor.yml",
169 | "circle.yml",
170 | ".circleci/config.yml",
171 | ".github/dependabot.yml",
172 | "codecov.yml",
173 | ".coveragerc",
174 | # Docker and Containers
175 | "Dockerfile",
176 | "docker-compose.yml",
177 | "docker-compose.override.yml",
178 | # Cloud and Serverless
179 | "serverless.yml",
180 | "firebase.json",
181 | "now.json",
182 | "netlify.toml",
183 | "vercel.json",
184 | "app.yaml",
185 | "terraform.tf",
186 | "main.tf",
187 | "cloudformation.yaml",
188 | "cloudformation.json",
189 | "ansible.cfg",
190 | "kubernetes.yaml",
191 | "k8s.yaml",
192 | # Database
193 | "schema.sql",
194 | "liquibase.properties",
195 | "flyway.conf",
196 | # Framework-specific
197 | "next.config.js",
198 | "nuxt.config.js",
199 | "vue.config.js",
200 | "angular.json",
201 | "gatsby-config.js",
202 | "gridsome.config.js",
203 | # API Documentation
204 | "swagger.yaml",
205 | "swagger.json",
206 | "openapi.yaml",
207 | "openapi.json",
208 | # Development environment
209 | ".nvmrc",
210 | ".ruby-version",
211 | ".python-version",
212 | "Vagrantfile",
213 | # Quality and metrics
214 | ".codeclimate.yml",
215 | "codecov.yml",
216 | # Documentation
217 | "mkdocs.yml",
218 | "_config.yml",
219 | "book.toml",
220 | "readthedocs.yml",
221 | ".readthedocs.yaml",
222 | # Package registries
223 | ".npmrc",
224 | ".yarnrc",
225 | # Linting and formatting
226 | ".isort.cfg",
227 | ".markdownlint.json",
228 | ".markdownlint.yaml",
229 | # Security
230 | ".bandit",
231 | ".secrets.baseline",
232 | # Misc
233 | ".pypirc",
234 | ".gitkeep",
235 | ".npmignore",
236 | ]
237 |
238 | # Normalize the list once into a set for efficient lookup
239 | NORMALIZED_ROOT_IMPORTANT_FILES = set(os.path.normpath(path) for path in ROOT_IMPORTANT_FILES_LIST)
240 |
--------------------------------------------------------------------------------
/agent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | Defines the Agent class responsible for the core logic of interacting with the LLM.
6 |
7 | This module contains the `Agent` class which encapsulates the agentic loop behavior.
8 | It constructs prompts, processes LLM responses, and determines when to use tools.
9 | """
10 |
11 | import json # Keep for parsing LLM responses if needed
12 | import os
13 | import sys
14 | from typing import List, Dict, Optional
15 |
16 | from llm import LLMClient
17 | from repomapper import RepoMapper # Keep for agent's internal use if needed (e.g., environment details)
18 | # Import tool definitions and provider formatting
19 | from tool_definitions import get_all_tools
20 | from llm_providers import get_formatted_tools
21 | # Import only the base system prompt template
22 | from system_prompt import MAIN_SYSTEM_PROMPT
23 | import tiktoken # For token counting
24 |
25 | from utils import (
26 | get_os_name,
27 | eval_in_emacs
28 | )
29 |
30 | class Agent:
31 | """
32 | Manages the agentic interaction loop for a given session.
33 |
34 | This class is instantiated by `llm_worker.py` for each interaction. It takes the
35 | current session state (prompt, history, context) and orchestrates the
36 | conversation with the LLM.
37 |
38 | Key Responsibilities:
39 | - Building the system prompt, incorporating dynamic information like the
40 | current working directory and OS details.
41 | - Preparing the full message list for the LLM, including the system prompt,
42 | truncated history, and environment details (provided by the worker).
43 | - Managing history truncation logic to stay within token limits.
44 | - Parsing LLM responses to identify tool usage requests using XML-like tags.
45 | - Determining the next step in the interaction (e.g., call tool, respond directly,
46 | finish).
47 |
48 | Note: History itself is managed by the `Session` object in the main `emigo.py`
49 | process and passed to the worker for each interaction. Tool implementations
50 | reside in `tools.py` and are executed via the main `emigo.py` process.
51 | """
52 |
53 | def __init__(self, session_path: str, llm_client: LLMClient, chat_files_ref: Dict[str, List[str]], verbose: bool = False):
54 | self.session_path = session_path # This is the root directory for the session
55 | self.llm_client = llm_client
56 | self.chat_files_ref = chat_files_ref # Reference to Emigo's chat_files dict
57 | self.environment_details_str = "" # Initialize, will be updated by worker loop
58 | self.verbose = verbose
59 | # Keep RepoMapper instance, but usage is restricted
60 | self.repo_mapper = RepoMapper(root_dir=self.session_path, verbose=self.verbose)
61 | # History truncation settings
62 | self.max_history_tokens = 8000 # Target max tokens for history
63 | self.min_history_messages = 3 # Always keep at least this many messages
64 | # Tokenizer for history management
65 | try:
66 | self.tokenizer = tiktoken.get_encoding("cl100k_base")
67 | # Test the tokenizer works
68 | test_tokens = self.tokenizer.encode("test")
69 | if not test_tokens:
70 | raise ValueError("Tokenizer returned empty tokens")
71 | except Exception as e:
72 | print(f"Warning: Could not initialize tokenizer. Using simple character count fallback. Error: {e}", file=sys.stderr)
73 | self.tokenizer = None
74 |
75 | # --- Prompt Building ---
76 |
77 | def _build_system_prompt(self) -> str:
78 | """Builds the system prompt, inserting dynamic info and formatted tool list."""
79 | session_dir = self.session_path
80 | os_name = get_os_name()
81 | shell = "/bin/bash" # Default shell - TODO: Get from Emacs?
82 | homedir = os.path.expanduser("~")
83 |
84 | # Get all tool definitions
85 | available_tools = get_all_tools()
86 | # Format tools for the specific LLM provider (e.g., OpenAI)
87 | # Assumes llm_client has model_name attribute
88 | formatted_tools = get_formatted_tools(available_tools, self.llm_client.model_name)
89 | # Convert the formatted list to a JSON string for insertion
90 | tools_json_string = json.dumps(formatted_tools, indent=2)
91 |
92 | # Use .format() on the MAIN_SYSTEM_PROMPT template
93 | prompt = MAIN_SYSTEM_PROMPT.format(
94 | session_dir=session_dir.replace(os.sep, '/'), # Ensure POSIX paths
95 | os_name=os_name,
96 | shell=shell,
97 | homedir=homedir.replace(os.sep, '/'),
98 | tools_json=tools_json_string # Insert the formatted tool definitions
99 | )
100 | return prompt
101 |
102 | # --- LLM Prompt Preparation & History Management ---
103 | # _parse_tool_use (XML parser) is removed. Parsing now happens in llm_worker.py
104 |
105 | def _prepare_llm_prompt(self, system_prompt: str, current_interaction_history: List[Dict]) -> List[Dict]:
106 | """Prepares the list of messages for the LLM, including history truncation and environment details.
107 | Uses the provided current_interaction_history list (list of dicts).
108 | Environment details are stored in self.environment_details_str."""
109 | # Always include system prompt
110 | messages_to_send = [{"role": "system", "content": system_prompt}]
111 |
112 | # --- History Truncation: Keep messages within token limit ---
113 | # Truncate the provided history list (already dicts)
114 | messages_to_send.extend(self._truncate_history(current_interaction_history))
115 |
116 | # --- Append Environment Details (Stored in self.environment_details_str) ---
117 | # Use copy() to avoid modifying the history object directly
118 | last_message_copy = messages_to_send[-1].copy()
119 | last_message_copy["content"] += f"\n\n{self.environment_details_str}" # Append stored details
120 | messages_to_send[-1] = last_message_copy # Replace the last message
121 |
122 | return messages_to_send
123 |
124 | def _call_llm_and_stream_response(self, messages_to_send: List[Dict]) -> Optional[str]:
125 | """Calls the LLM, streams the response, and returns the full response text."""
126 | full_response = ""
127 | eval_in_emacs("emigo--flush-buffer", self.session_path, "\nAssistant:\n", "llm") # Signal start
128 | try:
129 | # Send the temporary list with context included
130 | response_stream = self.llm_client.send(messages_to_send, stream=True)
131 | for chunk in response_stream:
132 | # Ensure chunk is a string, default to empty string if None
133 | content_to_flush = chunk or ""
134 | eval_in_emacs("emigo--flush-buffer", self.session_path, content_to_flush, "llm")
135 | if chunk: # Only append non-None chunks to full_response
136 | full_response += chunk
137 | return full_response
138 | except Exception as e:
139 | error_message = f"[Error during LLM communication: {e}]"
140 | print(f"\n{error_message}", file=sys.stderr)
141 | eval_in_emacs("emigo--flush-buffer", self.session_path, str(error_message), "error")
142 | # Add error to persistent history (handled in main loop now)
143 | # self.llm_client.append_history({"role": "assistant", "content": error_message})
144 | return None # Indicate error
145 |
146 | # --- History Truncation & Token Counting ---
147 |
148 | def _truncate_history(self, history: List[Dict[str, str]]) -> List[Dict[str, str]]:
149 | """Truncate history to fit within token limits while preserving important messages."""
150 | if not history:
151 | return []
152 |
153 | # Always keep first user message for context
154 | truncated = [history[0]]
155 | current_tokens = self._count_tokens(truncated[0]["content"])
156 |
157 | # Add messages from newest to oldest until we hit the limit
158 | for msg in reversed(history[1:]):
159 | msg_tokens = self._count_tokens(msg["content"])
160 | if current_tokens + msg_tokens > self.max_history_tokens:
161 | if len(truncated) >= self.min_history_messages:
162 | break
163 | # If we're below min messages, keep going but warn
164 | print("Warning: History exceeds token limit but below min message count", file=sys.stderr)
165 |
166 | truncated.insert(1, msg) # Insert after first message
167 | current_tokens += msg_tokens
168 |
169 | if self.verbose and len(truncated) < len(history):
170 | print(f"History truncated from {len(history)} to {len(truncated)} messages ({current_tokens} tokens)", file=sys.stderr)
171 |
172 | return truncated
173 |
174 | def _count_tokens(self, text: str) -> int:
175 | """Count tokens in text using tokenizer or fallback method."""
176 | if not text:
177 | return 0
178 |
179 | if self.tokenizer:
180 | try:
181 | return len(self.tokenizer.encode(text))
182 | except Exception as e:
183 | print(f"Token counting error, using fallback: {e}", file=sys.stderr)
184 |
185 | # Fallback: approximate tokens as 4 chars per token
186 | return max(1, len(text) // 4)
187 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | Utility functions for Emigo, primarily focused on Emacs communication.
6 |
7 | This module provides helper functions used across the Emigo Python backend.
8 | Its main role is to facilitate communication from Python back to the Emacs
9 | Lisp frontend using the EPC (Emacs Process Communication) protocol.
10 |
11 | Key Features:
12 | - Initialization and management of the EPC client connection to Emacs.
13 | - Functions (`eval_in_emacs`, `get_emacs_func_result`, `get_emacs_var`, etc.)
14 | to execute Elisp code or retrieve variables from Emacs, both synchronously
15 | and asynchronously.
16 | - Argument transformation helpers (`epc_arg_transformer`) to bridge Python
17 | data types and Elisp S-expressions.
18 | - Basic file/path utilities (`path_to_uri`, `read_file_content`).
19 | - OS detection (`get_os_name`).
20 | """
21 |
22 | # Copyright (C) 2022 Andy Stewart
23 | #
24 | # Author: Andy Stewart
25 | # Maintainer: Andy Stewart
26 | #
27 | # This program is free software: you can redistribute it and/or modify
28 | # it under the terms of the GNU General Public License as published by
29 | # the Free Software Foundation, either version 3 of the License, or
30 | # any later version.
31 | #
32 | # This program is distributed in the hope that it will be useful,
33 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
34 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 | # GNU General Public License for more details.
36 | #
37 | # You should have received a copy of the GNU General Public License
38 | # along with this program. If not, see .
39 | import functools
40 | from typing import Optional
41 | from urllib.parse import urlparse
42 |
43 | import sexpdata
44 | import logging
45 | import pathlib
46 | import platform
47 | import sys
48 | import re
49 |
50 | from epc.client import EPCClient
51 |
52 | import orjson as json_parser
53 |
54 | epc_client: Optional[EPCClient] = None
55 |
56 | # initialize logging, default to STDERR and INFO level
57 | logger = logging.getLogger("emigo")
58 | logger.setLevel(logging.INFO)
59 | logger.addHandler(logging.StreamHandler())
60 |
61 |
62 | def init_epc_client(emacs_server_port):
63 | global epc_client
64 |
65 | if epc_client is None:
66 | try:
67 | epc_client = EPCClient(("127.0.0.1", emacs_server_port), log_traceback=True)
68 | except ConnectionRefusedError:
69 | import traceback
70 | logger.error(traceback.format_exc())
71 |
72 |
73 | def close_epc_client():
74 | if epc_client is not None:
75 | epc_client.close()
76 |
77 |
78 | def eval_in_emacs(method_name, *args):
79 | # Construct the list for the S-expression directly with Python types
80 | sexp_list = [sexpdata.Symbol(method_name)] + list(args)
81 | # Let sexpdata.dumps handle conversion and escaping of Python types (str, int, etc.)
82 | sexp = sexpdata.dumps(sexp_list)
83 |
84 | logger.debug("Eval in Emacs: %s", sexp)
85 | # Call eval-in-emacs elisp function.
86 | epc_client.call("eval-in-emacs", [sexp]) # type: ignore
87 |
88 |
89 | def message_emacs(message: str):
90 | """Message to Emacs with prefix."""
91 | eval_in_emacs("message", "[Emigo] " + message)
92 |
93 |
94 | def epc_arg_transformer(arg):
95 | """Transform elisp object to python object
96 | 1 => 1
97 | "string" => "string"
98 | (list :a 1 :b 2) => {"a": 1, "b": 2}
99 | (list :a 1 :b (list :c 2)) => {"a": 1, "b": {"c": 2}}
100 | (list 1 2 3) => [1 2 3]
101 | (list 1 2 (list 3 4)) => [1 2 [3 4]]
102 | """
103 | if not isinstance(arg, list):
104 | return arg
105 |
106 | # NOTE: Empty list elisp can be treated as both empty python dict/list
107 | # Convert empty elisp list to empty python dict due to compatibility.
108 |
109 | # check if we can tranform arg to python dict instance
110 | type_dict_p = len(arg) % 2 == 0
111 | if type_dict_p:
112 | for v in arg[::2]:
113 | if (not isinstance(v, sexpdata.Symbol)) or not v.value().startswith(":"):
114 | type_dict_p = False
115 | break
116 |
117 | if type_dict_p:
118 | # transform [Symbol(":a"), 1, Symbol(":b"), 2] to dict(a=1, b=2)
119 | ret = dict()
120 | for i in range(0, len(arg), 2):
121 | ret[arg[i].value()[1:]] = epc_arg_transformer(arg[i + 1])
122 | return ret
123 | else:
124 | return list(map(epc_arg_transformer, arg))
125 |
126 |
127 | def convert_emacs_bool(symbol_value, symbol_is_boolean):
128 | if symbol_is_boolean == "t":
129 | return symbol_value is True
130 | else:
131 | return symbol_value
132 |
133 | def get_emacs_vars(args):
134 | return list(map(lambda result: convert_emacs_bool(result[0], result[1]) if result != [] else False,
135 | epc_client.call_sync("get-emacs-vars", args))) # type: ignore
136 |
137 |
138 | def get_emacs_var(var_name):
139 | symbol_value, symbol_is_boolean = epc_client.call_sync("get-emacs-var", [var_name]) # type: ignore
140 |
141 | return convert_emacs_bool(symbol_value, symbol_is_boolean)
142 |
143 |
144 | def get_emacs_func_result(method_name, *args):
145 | """Call eval-in-emacs elisp function synchronously and return the result."""
146 | result = epc_client.call_sync(method_name, args) # type: ignore
147 | return result
148 |
149 |
150 | def get_command_result(command_string, cwd):
151 | import subprocess
152 |
153 | process = subprocess.Popen(command_string, cwd=cwd, shell=True, text=True,
154 | stdout=subprocess.PIPE, stderr=subprocess.PIPE,
155 | encoding="utf-8")
156 | ret = process.wait()
157 | return "".join((process.stdout if ret == 0 else process.stderr).readlines()).strip() # type: ignore
158 |
159 |
160 | def generate_request_id():
161 | import random
162 | return abs(random.getrandbits(16))
163 |
164 |
165 | # modified from Lib/pathlib.py
166 | def _make_uri_win32(path):
167 | from urllib.parse import quote_from_bytes as urlquote_from_bytes
168 | # Under Windows, file URIs use the UTF-8 encoding.
169 | drive = path.drive
170 | if len(drive) == 2 and drive[1] == ':':
171 | # It's a path on a local drive => 'file:///c:/a/b'
172 | rest = path.as_posix()[2:].lstrip('/')
173 | return 'file:///%s%%3A/%s' % (
174 | drive[0], urlquote_from_bytes(rest.encode('utf-8')))
175 | else:
176 | # It's a path on a network drive => 'file://host/share/a/b'
177 | return 'file:' + urlquote_from_bytes(path.as_posix().encode('utf-8'))
178 |
179 | def path_to_uri(path):
180 | path = pathlib.Path(path)
181 | if get_os_name() != "windows":
182 | uri = path.as_uri()
183 | else:
184 | if not path.is_absolute():
185 | raise ValueError("relative path can't be expressed as a file URI")
186 | # encode uri to 'file:///c%3A/project/xxx.js' like vscode does
187 | uri = _make_uri_win32(path)
188 | return uri
189 |
190 |
191 | def uri_to_path(uri):
192 | from urllib.parse import unquote
193 | # parse first, '#' may be part of filepath(encoded)
194 | parsed = urlparse(uri)
195 | # for example, ts-ls return 'file:///c%3A/lib/ref.js'
196 | path = unquote(parsed.path)
197 | if sys.platform == "win32":
198 | path = path[1:]
199 | return path
200 |
201 |
202 | def path_as_key(path):
203 | key = path
204 | # NOTE: (buffer-file-name) return "d:/Case/a.go", gopls return "file:///D:/Case/a.go"
205 | if sys.platform == "win32":
206 | path = pathlib.Path(path).as_posix()
207 | key = path.lower()
208 | return key
209 |
210 |
211 | def add_to_path_dict(path_dict, filepath, value):
212 | path_dict[path_as_key(filepath)] = value
213 |
214 |
215 | def is_in_path_dict(path_dict, path):
216 | path_key = path_as_key(path)
217 | return path_key in path_dict
218 |
219 |
220 | def remove_from_path_dict(path_dict, path):
221 | del path_dict[path_as_key(path)]
222 |
223 |
224 | def get_from_path_dict(path_dict, filepath):
225 | return path_dict[path_as_key(filepath)]
226 |
227 |
228 | def log_time(message):
229 | import datetime
230 | logger.info("\n--- [{}] {}".format(datetime.datetime.now().time(), message))
231 |
232 | @functools.lru_cache(maxsize=None)
233 | def get_emacs_version():
234 | return get_emacs_func_result("get-emacs-version")
235 |
236 |
237 | def get_os_name():
238 | return platform.system().lower()
239 |
240 | def parse_json_content(content):
241 | return json_parser.loads(content)
242 |
243 | def read_file_content(abs_path: str) -> str:
244 | """Reads the content of a file."""
245 | # Basic implementation, consider adding error handling for encoding etc.
246 | # like in repomapper.read_text
247 | try:
248 | # Try UTF-8 first, the most common encoding
249 | with open(abs_path, 'r', encoding='utf-8') as f:
250 | return f.read()
251 | except UnicodeDecodeError:
252 | # If UTF-8 fails, try the system's default encoding or latin-1 as fallback
253 | try:
254 | with open(abs_path, 'r', encoding=sys.getdefaultencoding()) as f:
255 | return f.read()
256 | except UnicodeDecodeError:
257 | # As a last resort, try latin-1, which rarely fails but might misinterpret chars
258 | with open(abs_path, 'r', encoding='latin-1') as f:
259 | return f.read()
260 | except Exception as e:
261 | print(f"Error reading file {abs_path}: {e}", file=sys.stderr)
262 | raise # Re-raise for the agent handler to catch and format
263 |
264 | def touch(path):
265 | import os
266 |
267 | if not os.path.exists(path):
268 | basedir = os.path.dirname(path)
269 |
270 | if not os.path.exists(basedir):
271 | os.makedirs(basedir)
272 |
273 | with open(path, 'a'):
274 | os.utime(path)
275 |
276 |
277 | # --- Filtering Helper ---
278 | def _filter_environment_details(text: str) -> str:
279 | """Removes ... blocks from text."""
280 | if not isinstance(text, str): # Handle potential non-string content
281 | return text
282 | # Use re.DOTALL to make '.' match newlines, make it non-greedy
283 | return re.sub(r".*?\s*", "\n", text, flags=re.DOTALL)
284 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/tool_definitions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | Defines the structure for tools and registers available tools for Emigo.
6 |
7 | This module provides:
8 | - TypedDict definitions for ToolParameter and ToolDefinition.
9 | - Concrete definitions for each available tool, linking to their implementation
10 | in tools.py.
11 | - A TOOL_REGISTRY dictionary for easy access to all tool definitions.
12 | - Helper functions to retrieve tool definitions.
13 | """
14 |
15 | from typing import Callable, Dict, List, TypedDict, Literal, Optional
16 | # Import tool implementation functions from tools.py
17 | from tools import (
18 | execute_command,
19 | read_file,
20 | write_to_file,
21 | replace_in_file,
22 | search_files,
23 | list_files,
24 | list_repomap,
25 | ask_followup_question,
26 | attempt_completion
27 | )
28 |
29 |
30 | # --- Tool Name Constants ---
31 | TOOL_EXECUTE_COMMAND = "execute_command"
32 | TOOL_READ_FILE = "read_file"
33 | TOOL_WRITE_TO_FILE = "write_to_file"
34 | TOOL_REPLACE_IN_FILE = "replace_in_file"
35 | TOOL_SEARCH_FILES = "search_files"
36 | TOOL_LIST_FILES = "list_files"
37 | TOOL_LIST_REPOMAP = "list_repomap"
38 | TOOL_ASK_FOLLOWUP_QUESTION = "ask_followup_question"
39 | TOOL_ATTEMPT_COMPLETION = "attempt_completion"
40 |
41 |
42 | # --- Type Definitions ---
43 |
44 | class ToolParameter(TypedDict):
45 | """Defines the structure for a single tool parameter."""
46 | name: str
47 | type: Literal["string", "integer", "boolean", "number", "array", "object"] # JSON Schema types
48 | description: str
49 | required: bool
50 | # Optional fields for complex types (future enhancement)
51 | # items: Optional[Dict] # For array type
52 | # properties: Optional[Dict[str, Dict]] # For object type
53 |
54 | class ToolDefinition(TypedDict):
55 | """Defines the structure for a single tool."""
56 | name: str
57 | description: str
58 | parameters: List[ToolParameter]
59 | function: Callable[..., str] # Function signature: (session: Session, parameters: Dict[str, Any]) -> str
60 |
61 | # --- Tool Definitions ---
62 |
63 | # Define each tool using the ToolDefinition structure
64 |
65 | EXECUTE_COMMAND_TOOL = ToolDefinition(
66 | name="execute_command",
67 | description="Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run.",
68 | parameters=[
69 | ToolParameter(name="command", type="string", description="The shell command to execute.", required=True),
70 | # Note: requires_approval is handled internally in emigo.py based on tool name, not an LLM param.
71 | ],
72 | function=execute_command
73 | )
74 |
75 | READ_FILE_TOOL = ToolDefinition(
76 | name="read_file",
77 | description="Request to read the contents of a file at the specified path. Use this tool *only* when the user has explicitly instructed you to read a specific file path or you have already used list_repomap and identified this specific file as necessary for the next step. Do NOT use this tool based on guesses about where functionality might reside; use list_repomap first in such cases. Use this tool if the file's content is not already present in . Reading a file will add its content to for subsequent turns. May not be suitable for other types of binary files, as it returns the raw content as a string.",
78 | parameters=[
79 | ToolParameter(name="path", type="string", description="The relative path of the file to read.", required=True),
80 | ],
81 | function=read_file
82 | )
83 |
84 | WRITE_TO_FILE_TOOL = ToolDefinition(
85 | name="write_to_file",
86 | description="Request to write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. This tool will automatically create any directories needed to write the file.",
87 | parameters=[
88 | ToolParameter(name="path", type="string", description="The relative path of the file to write.", required=True),
89 | ToolParameter(name="content", type="string", description="The complete content to write to the file.", required=True),
90 | ],
91 | function=write_to_file
92 | )
93 |
94 | REPLACE_IN_FILE_TOOL = ToolDefinition(
95 | name="replace_in_file",
96 | description="Request to replace sections of content in an existing file using SEARCH/REPLACE blocks that define exact changes to specific parts of the file. This tool should be used when you need to make targeted changes to specific parts of a file.",
97 | parameters=[
98 | ToolParameter(name="path", type="string", description="The relative path of the file to modify.", required=True),
99 | ToolParameter(name="diff", type="string", description="""
100 | One or more SEARCH/REPLACE blocks following this exact format:
101 | ````
102 | <<<<<<< SEARCH
103 | [exact content to find]
104 | =======
105 | [new content to replace with]
106 | >>>>>>> REPLACE
107 | ````
108 | Critical rules:
109 | 1. SEARCH content must match the associated file section to find EXACTLY:
110 | * Match character-for-character including whitespace, indentation, line endings
111 | * Include all comments, docstrings, etc.
112 | 2. SEARCH/REPLACE blocks will ONLY replace the first match occurrence.
113 | * Including multiple unique SEARCH/REPLACE blocks if you need to make multiple changes.
114 | * Include *just* enough lines in each SEARCH section to uniquely match each set of lines that need to change.
115 | * When using multiple SEARCH/REPLACE blocks, list them in the order they appear in the file.
116 | 3. Keep SEARCH/REPLACE blocks concise:
117 | * Break large SEARCH/REPLACE blocks into a series of smaller blocks that each change a small portion of the file.
118 | * Include just the changing lines, and a few surrounding lines if needed for uniqueness.
119 | * Do not include long runs of unchanging lines in SEARCH/REPLACE blocks.
120 | * Each line must be complete. Never truncate lines mid-way through as this can cause matching failures.
121 | 4. Special operations:
122 | * To move code: Use two SEARCH/REPLACE blocks (one to delete from original + one to insert at new location)
123 | * To delete code: Use empty REPLACE section""", required=True),
124 | ],
125 | function=replace_in_file
126 | )
127 |
128 | SEARCH_FILES_TOOL = ToolDefinition(
129 | name="search_files",
130 | description="Request to perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with its line number and the line content.",
131 | parameters=[
132 | ToolParameter(name="path", type="string", description="The path of the directory to search in (relative to the session directory). This directory will be recursively searched.", required=True),
133 | ToolParameter(name="pattern", type="string", description="The regular expression pattern to search for. Uses Python regex syntax. Ensure the pattern is correctly escaped if needed.", required=True),
134 | ToolParameter(name="case_sensitive", type="boolean", description="Whether the search should be case-sensitive (default: false).", required=False),
135 | ToolParameter(name="max_matches", type="integer", description="Maximum number of matches to return (default: 20, max: 200).", required=False),
136 | ],
137 | function=search_files
138 | )
139 |
140 | LIST_FILES_TOOL = ToolDefinition(
141 | name="list_files",
142 | description="Request to list files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents. Do not use this tool to confirm the existence of files you may have created, as the user will let you know if the files were created successfully or not.",
143 | parameters=[
144 | ToolParameter(name="path", type="string", description="The relative path of the directory to list.", required=True),
145 | ToolParameter(name="recursive", type="boolean", description="Whether to list files recursively (default: false).", required=False),
146 | ],
147 | function=list_files
148 | )
149 |
150 | LIST_REPOMAP_TOOL = ToolDefinition(
151 | name="list_repomap",
152 | description="Request a high-level summary of the codebase structure within the session directory. This tool analyzes the source code files (respecting .gitignore and avoiding binary/ignored files) and extracts key definitions (classes, functions, methods, variables, etc.) along with relevant code snippets showing their usage context. It uses a ranking algorithm (PageRank) to prioritize the most important and interconnected parts of the code, especially considering files already discussed or mentioned. This provides a concise yet informative overview, far more useful than a simple file listing (list_files) or reading individual files (read_file) when you need to understand the project's architecture, identify where specific functionality resides, or plan complex changes. **When unsure where functionality resides or how code is structured, you MUST use list_repomap first.** It is much more efficient and context-aware than guessing file paths and using read_file sequentially. Use list_repomap to get a map of the relevant code landscape before diving into specific files. The analysis focuses on the source files within the session directory. The result of this tool will be added to the for subsequent turns.",
153 | parameters=[ # Add the path parameter here
154 | ToolParameter(name="path", type="string", description="Optional relative path of the directory to focus the analysis on. If omitted, analyzes the entire session directory.", required=False),
155 | ],
156 | function=list_repomap
157 | )
158 |
159 | ASK_FOLLOWUP_QUESTION_TOOL = ToolDefinition(
160 | name="ask_followup_question",
161 | description="Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.",
162 | parameters=[
163 | ToolParameter(name="question", type="string", description="The question to ask the user.", required=True),
164 | ToolParameter(name="options", type="array", description="Optional array of 2-5 string options for the user to choose from.", required=False),
165 | ],
166 | function=ask_followup_question
167 | )
168 |
169 | ATTEMPT_COMPLETION_TOOL = ToolDefinition(
170 | name="attempt_completion",
171 | description="Use this tool ONLY when you have successfully completed all steps required by the user's request. After using a tool like `replace_in_file` or `write_to_file`, analyze the result: if the change successfully fulfills the user's request, use this tool to present the final result. Do not attempt further refinements unless explicitly asked. Optionally, provide a CLI command to demonstrate the result. The user may provide feedback if unsatisfied, which you can use to make improvements and try again.",
172 | parameters=[
173 | ToolParameter(name="result", type="string", description="The final result description.", required=True),
174 | ToolParameter(name="command", type="string", description="Optional CLI command to demonstrate the result.", required=False),
175 | ],
176 | function=attempt_completion
177 | )
178 |
179 | # --- Tool Registry ---
180 |
181 | TOOL_REGISTRY: Dict[str, ToolDefinition] = {
182 | tool['name']: tool for tool in [
183 | EXECUTE_COMMAND_TOOL,
184 | READ_FILE_TOOL,
185 | WRITE_TO_FILE_TOOL,
186 | REPLACE_IN_FILE_TOOL,
187 | SEARCH_FILES_TOOL,
188 | LIST_FILES_TOOL,
189 | LIST_REPOMAP_TOOL,
190 | ASK_FOLLOWUP_QUESTION_TOOL,
191 | ATTEMPT_COMPLETION_TOOL,
192 | ]
193 | }
194 |
195 | # --- Helper Functions ---
196 |
197 | def get_tool(name: str) -> Optional[ToolDefinition]:
198 | """Retrieves a tool definition by name."""
199 | return TOOL_REGISTRY.get(name)
200 |
201 | def get_all_tools() -> List[ToolDefinition]:
202 | """Retrieves a list of all registered tool definitions."""
203 | return list(TOOL_REGISTRY.values())
204 |
--------------------------------------------------------------------------------
/llm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | LLM Client Wrapper using LiteLLM.
5 |
6 | Provides a simplified interface (`LLMClient`) for interacting with various
7 | Large Language Models (LLMs) supported by the `litellm` library. It handles
8 | API calls, streaming responses, and basic configuration (model name, API keys,
9 | base URLs).
10 |
11 | Note: This client is designed to be stateless regarding chat history. The
12 | calling process (e.g., `llm_worker.py`) is responsible for managing and
13 | passing the complete message history for each API call.
14 | """
15 |
16 | import importlib
17 | import json
18 | import os
19 | import sys
20 | import time
21 | import warnings
22 | from typing import Dict, Iterator, List, Optional, Union # Removed Tuple
23 |
24 | # Filter out UserWarning from pydantic used by litellm
25 | warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
26 |
27 | # --- Lazy Loading for litellm ---
28 |
29 | # Configure basic litellm settings globally
30 | EMIGO_SITE_URL = "https://github.com/MatthewZMD/emigo" # Example URL, adjust if needed
31 | EMIGO_APP_NAME = "Emigo" # Example App Name
32 | os.environ["OR_SITE_URL"] = os.environ.get("OR_SITE_URL", EMIGO_SITE_URL)
33 | os.environ["OR_APP_NAME"] = os.environ.get("OR_APP_NAME", EMIGO_APP_NAME)
34 | os.environ["LITELLM_MODE"] = os.environ.get("LITELLM_MODE", "PRODUCTION")
35 |
36 | VERBOSE_LLM_LOADING = False # Set to True for debugging litellm loading
37 |
38 | class LazyLiteLLM:
39 | """Lazily loads the litellm library upon first access."""
40 | _lazy_module = None
41 |
42 | def __getattr__(self, name):
43 | # Avoid infinite recursion during initialization
44 | if name == "_lazy_module":
45 | return super().__getattribute__(name)
46 |
47 | self._load_litellm()
48 | return getattr(self._lazy_module, name)
49 |
50 | def _load_litellm(self):
51 | """Loads and configures the litellm module."""
52 | if self._lazy_module is not None:
53 | return
54 |
55 | if VERBOSE_LLM_LOADING:
56 | print("Loading litellm...", file=sys.stderr)
57 | start_time = time.time()
58 |
59 | try:
60 | self._lazy_module = importlib.import_module("litellm")
61 |
62 | # Basic configuration similar to Aider
63 | self._lazy_module.suppress_debug_info = True
64 | self._lazy_module.set_verbose = False
65 | self._lazy_module.drop_params = True # Drop unsupported params silently
66 | # Attempt to disable internal debugging/logging if method exists
67 | if hasattr(self._lazy_module, "_logging") and hasattr(
68 | self._lazy_module._logging, "_disable_debugging"
69 | ):
70 | self._lazy_module._logging._disable_debugging()
71 |
72 | except ImportError as e:
73 | print(
74 | f"Error: {e} litellm not found. Please install it: pip install litellm",
75 | file=sys.stderr,
76 | )
77 | sys.exit(1)
78 | except Exception as e:
79 | print(f"Error loading litellm: {e}", file=sys.stderr)
80 | sys.exit(1)
81 |
82 | if VERBOSE_LLM_LOADING:
83 | load_time = time.time() - start_time
84 | print(f"Litellm loaded in {load_time:.2f} seconds.", file=sys.stderr)
85 |
86 | # Global instance of the lazy loader
87 | litellm = LazyLiteLLM()
88 |
89 | # --- LLM Client Class ---
90 |
91 | class LLMClient:
92 | """Handles interaction with the LLM and manages chat history."""
93 |
94 | def __init__(
95 | self,
96 | model_name: str,
97 | api_key: Optional[str] = None,
98 | base_url: Optional[str] = None,
99 | verbose: bool = False,
100 | ):
101 | """
102 | Initializes the LLM client.
103 |
104 | Args:
105 | model_name: The name of the language model to use (e.g., "gpt-4o").
106 | api_key: Optional API key for the LLM service.
107 | base_url: Optional base URL for custom LLM endpoints (like Ollama).
108 | verbose: If True, enables verbose output.
109 | """
110 | self.model_name = model_name
111 | self.api_key = api_key
112 | self.base_url = base_url
113 | self.verbose = verbose
114 |
115 | def send(
116 | self,
117 | messages: List[Dict],
118 | stream: bool = True,
119 | temperature: float = 0.7,
120 | tools: Optional[List[Dict]] = None, # Add tools parameter
121 | tool_choice: Optional[str] = "auto", # Add tool_choice parameter
122 | ) -> Union[Iterator[str], object]: # Return type might be object for raw response
123 | """
124 | Sends the provided messages list to the LLM, potentially with tool definitions,
125 | and returns the response.
126 |
127 | Args:
128 | messages: The list of message dictionaries to send.
129 | stream: Whether to stream the response or wait for the full completion.
130 | temperature: The sampling temperature for the LLM.
131 |
132 | Returns:
133 | An iterator yielding response chunks if stream=True, otherwise the
134 | full response content string.
135 | """
136 | # Ensure litellm is loaded before making the call
137 | litellm._load_litellm()
138 |
139 | completion_kwargs = {
140 | "model": self.model_name,
141 | "messages": messages,
142 | "stream": stream,
143 | "temperature": temperature,
144 | }
145 | # Add tools and tool_choice if provided and not None/empty
146 | if tools:
147 | completion_kwargs["tools"] = tools
148 | if tool_choice: # Only add if tool_choice is meaningful
149 | completion_kwargs["tool_choice"] = tool_choice # e.g., "auto", "required", specific tool
150 |
151 | # Add API key and base URL if they were provided
152 | if self.api_key:
153 | completion_kwargs["api_key"] = self.api_key
154 | if self.base_url:
155 | completion_kwargs["base_url"] = self.base_url
156 | # OLLAMA specific adjustment if needed (example)
157 | if "ollama" in self.model_name or (self.base_url and "ollama" in self.base_url):
158 | # LiteLLM might handle this automatically, but explicitly setting can help
159 | completion_kwargs["model"] = self.model_name.replace("ollama/", "")
160 |
161 | try:
162 | # Store the raw response object for potential parsing later (e.g., tool calls)
163 | self.last_response_object = None # Initialize
164 |
165 | # Initiate the LLM call
166 | response = litellm.completion(**completion_kwargs)
167 | self.last_response_object = response # Store the raw response
168 |
169 | # --- Verbose Logging ---
170 | if self.verbose:
171 | # Import json here if not already imported at the top level
172 | import json
173 | print("\n--- Sending to LLM ---", file=sys.stderr)
174 | # Avoid printing potentially large base64 images in verbose mode
175 | printable_messages = []
176 | for msg in messages: # Use the 'messages' argument passed to send()
177 | if isinstance(msg.get("content"), list): # Handle image messages
178 | new_content = []
179 | for item in msg["content"]:
180 | if isinstance(item, dict) and item.get("type") == "image_url":
181 | # Truncate base64 data for printing
182 | img_url = item.get("image_url", {}).get("url", "")
183 | if isinstance(img_url, str) and img_url.startswith("data:"):
184 | new_content.append({"type": "image_url", "image_url": {"url": img_url[:50] + "..."}})
185 | else:
186 | new_content.append(item) # Keep non-base64 or non-string URLs
187 | else:
188 | new_content.append(item)
189 | # Append the modified message with potentially truncated image data
190 | printable_messages.append({"role": msg["role"], "content": new_content})
191 | else:
192 | printable_messages.append(msg) # Append non-image messages as is
193 |
194 | # Calculate approximate token count using litellm's utility
195 | token_count_str = ""
196 | try:
197 | # Ensure litellm is loaded before using its utilities
198 | litellm._load_litellm()
199 | # Use litellm's token counter if available
200 | count = litellm.token_counter(model=self.model_name, messages=messages)
201 | token_count_str = f" (estimated {count} tokens)"
202 | except Exception as e:
203 | # Fallback or simple message if token counting fails
204 | # We can't easily use the agent's tokenizer here, so rely on litellm or skip detailed count
205 | token_count_str = f" (token count unavailable: {e})"
206 |
207 |
208 | print(json.dumps(printable_messages, indent=2), file=sys.stderr)
209 | print(f"--- End LLM Request{token_count_str} ---", file=sys.stderr)
210 | # --- End Verbose Logging ---
211 |
212 | if stream:
213 | # Generator to yield the raw litellm chunk objects
214 | def raw_chunk_stream():
215 | # Move the try/except block inside the generator
216 | try:
217 | # The 'response' variable is accessible due to closure
218 | for chunk in response:
219 | # print(f"Raw chunk: {chunk}") # DEBUG: Ensure this is commented out
220 | yield chunk # Yield the original chunk object
221 | except litellm.exceptions.APIConnectionError as e: # Catch specific error
222 | # Log the specific error clearly
223 | # Add more detail from the exception object if possible
224 | error_details = f"Caught APIConnectionError: {e}\n"
225 | # Check for attributes that might hold response data (common in httpx/openai errors)
226 | if hasattr(e, 'response') and e.response:
227 | try:
228 | error_details += f" Response Status: {getattr(e.response, 'status_code', 'N/A')}\n"
229 | # Limit printing potentially large response content
230 | response_text = getattr(e.response, 'text', '')
231 | error_details += f" Response Content (first 500 chars): {response_text[:500]}{'...' if len(response_text) > 500 else ''}\n"
232 | except Exception as detail_err: error_details += f" (Error getting response details: {detail_err})\n"
233 | if hasattr(e, 'request') and e.request:
234 | try:
235 | error_details += f" Request URL: {getattr(e.request, 'url', 'N/A')}\n"
236 | except Exception as detail_err: error_details += f" (Error getting request details: {detail_err})\n"
237 | print(f"\n[LLMClient Stream Error] {error_details}", file=sys.stderr)
238 | print("[LLMClient Stream Error] Stream may be incomplete.", file=sys.stderr)
239 | # Yield an error marker instead of just passing
240 | yield {"_stream_error": True, "error_message": str(e)}
241 | except Exception as e:
242 | # Catch other potential errors during streaming
243 | # Add similar detailed logging
244 | error_details = f"Caught unexpected error: {type(e).__name__} - {e}\n"
245 | if hasattr(e, 'response') and e.response:
246 | try:
247 | error_details += f" Response Status: {getattr(e.response, 'status_code', 'N/A')}\n"
248 | response_text = getattr(e.response, 'text', '')
249 | error_details += f" Response Content (first 500 chars): {response_text[:500]}{'...' if len(response_text) > 500 else ''}\n"
250 | except Exception as detail_err: error_details += f" (Error getting response details: {detail_err})\n"
251 | if hasattr(e, 'request') and e.request:
252 | try:
253 | error_details += f" Request URL: {getattr(e.request, 'url', 'N/A')}\n"
254 | except Exception as detail_err: error_details += f" (Error getting request details: {detail_err})\n"
255 | # Include traceback for unexpected errors
256 | import traceback
257 | error_details += f" Traceback:\n{traceback.format_exc()}\n"
258 | print(f"\n[LLMClient Stream Error] {error_details}", file=sys.stderr)
259 | # Yield an error marker
260 | yield {"_stream_error": True, "error_message": str(e)}
261 |
262 | return raw_chunk_stream() # Return the generator yielding full chunks
263 | else:
264 | # For non-streaming, return the raw response object
265 | # The caller (llm_worker) will parse content or tool calls
266 | return response # Return the whole LiteLLM response object
267 |
268 | # Keep exception handling for non-streaming calls or errors *before* streaming starts
269 | except litellm.APIConnectionError as e:
270 | error_message = f"API Connection Error (pre-stream or non-stream): {e}"
271 | print(f"\n{error_message}", file=sys.stderr)
272 | # For non-streaming, return the error string
273 | return f"[LLM Error: {error_message}]"
274 | except Exception as e:
275 | error_message = f"General Error (pre-stream or non-stream): {e}"
276 | print(f"\n{error_message}", file=sys.stderr)
277 | # For non-streaming, return the error string
278 | return f"[LLM Error: {error_message}]"
279 |
280 |
281 | # --- Example Usage (Optional) ---
282 |
283 | def main():
284 | """Basic example demonstrating the LLMClient."""
285 | # Configure from environment variables or defaults
286 | model = os.getenv("EMIGO_MODEL", "gpt-4o-mini") # Example: use EMIGO_MODEL env var
287 | api_key = os.getenv("OPENAI_API_KEY")
288 | base_url = os.getenv("OPENAI_API_BASE") # Or OLLAMA_HOST, etc.
289 |
290 | if not api_key and not base_url:
291 | print("Warning: No API key or base URL found. Using default litellm configuration.", file=sys.stderr)
292 |
293 | client = LLMClient(model_name=model, api_key=api_key, base_url=base_url, verbose=True)
294 |
295 | # Example messages list (history is managed externally)
296 | messages = [
297 | {"role": "system", "content": "You are a helpful assistant."},
298 | {"role": "user", "content": "What is the capital of France?"}
299 | ]
300 | print(f"\nUser: {messages[-1]['content']}")
301 |
302 | # Send the messages list (non-streaming)
303 | print("\nAssistant (non-streaming):")
304 | assistant_response = client.send(messages, stream=False)
305 | print(assistant_response)
306 |
307 | # Add assistant's response to the external history list
308 | messages.append({"role": "assistant", "content": assistant_response})
309 |
310 | # Add another user message
311 | user_input_2 = "What about Spain?"
312 | messages.append({"role": "user", "content": user_input_2})
313 | print(f"\nUser: {user_input_2}")
314 |
315 | # Send again (streaming)
316 | print("\nAssistant (streaming):")
317 | full_streamed_response = ""
318 | response_stream = client.send(messages, stream=True)
319 | for chunk in response_stream:
320 | print(chunk, end="", flush=True)
321 | full_streamed_response += chunk
322 | print() # Newline after stream
323 |
324 | # Add streamed response to the external history list
325 | messages.append({"role": "assistant", "content": full_streamed_response})
326 |
327 | print("\n--- Final Messages List ---")
328 | print(json.dumps(messages, indent=2))
329 |
330 |
331 | if __name__ == "__main__":
332 | main()
333 |
--------------------------------------------------------------------------------
/session.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | Manages the state associated with a single Emigo chat session.
6 |
7 | Each instance of the `Session` class encapsulates all the information and
8 | operations related to a specific chat interaction occurring within a particular
9 | project directory (session path). This allows `emigo.py` to handle multiple
10 | concurrent sessions without state conflicts.
11 |
12 | Key Responsibilities:
13 | - Storing the chat history (sequence of user and assistant messages).
14 | - Managing the list of files currently included in the chat context.
15 | - Caching file contents and modification times to avoid redundant reads and
16 | provide consistent state to the LLM.
17 | - Holding an instance of `RepoMapper` specific to the session's root directory.
18 | - Providing methods to add/remove files from context, retrieve history,
19 | get cached file content, and generate the environment details string
20 | (including the repository map or file listing) for the LLM prompt.
21 | - Invalidating caches when files are modified externally or removed.
22 | """
23 |
24 | import sys
25 | import os
26 | import time
27 | import tiktoken
28 | from typing import Dict, List, Optional, Tuple
29 |
30 | from repomapper import RepoMapper
31 | from utils import (
32 | eval_in_emacs, _filter_environment_details, read_file_content
33 | )
34 |
35 | class Session:
36 | """Encapsulates the state and operations for a single Emigo session."""
37 |
38 | def __init__(self, session_path: str, verbose: bool = False):
39 | self.session_path = session_path
40 | self.verbose = verbose
41 | self.history: List[Tuple[float, Dict]] = [] # List of (timestamp, message_dict)
42 | self.chat_files: List[str] = [] # List of relative file paths
43 | # Caches for file content, mtimes, and the last generated repomap
44 | self.caches: Dict[str, any] = {'mtimes': {}, 'contents': {}, 'last_repomap': None}
45 | # RepoMapper instance specific to this session
46 | # TODO: Get map_tokens and tokenizer from config?
47 | self.repo_mapper = RepoMapper(root_dir=self.session_path, verbose=self.verbose)
48 |
49 | self.tokenizer = tiktoken.get_encoding("cl100k_base")
50 | print(f"Initialized Session for path: {self.session_path}", file=sys.stderr)
51 |
52 | def get_history(self) -> List[Tuple[float, Dict]]:
53 | """Returns the chat history for this session."""
54 | return list(self.history) # Return a copy
55 |
56 | def append_history(self, message: Dict):
57 | """Appends a message with a timestamp to the history."""
58 | if "role" not in message or "content" not in message:
59 | print(f"Warning: Attempted to add invalid message to history: {message}", file=sys.stderr)
60 | return
61 | # Filter content before appending
62 | filtered_message = dict(message) # Create a copy
63 | filtered_message["content"] = _filter_environment_details(filtered_message["content"])
64 | self.history.append((time.time(), filtered_message)) # Store filtered copy
65 |
66 | def clear_history(self):
67 | """Clears the chat history for this session."""
68 | self.history = []
69 | # Note: Clearing the Emacs buffer is handled separately by the main process calling Elisp
70 |
71 | def get_chat_files(self) -> List[str]:
72 | """Returns the list of files currently in the chat context."""
73 | return list(self.chat_files) # Return a copy
74 |
75 | def add_file_to_context(self, filename: str) -> Tuple[bool, str]:
76 | """
77 | Adds a file to the chat context. Ensures it's relative and exists.
78 | Returns (success: bool, message: str).
79 | """
80 | try:
81 | # Expand user directory)
82 | filename = os.path.expanduser(filename)
83 | # Ensure filename is relative to session_path for consistency
84 | rel_filename = os.path.relpath(filename, self.session_path)
85 | # Check if file exists and is within session path
86 | abs_path = os.path.abspath(os.path.join(self.session_path, rel_filename))
87 |
88 | if not os.path.isfile(abs_path):
89 | return False, f"File not found: {rel_filename}"
90 | if not abs_path.startswith(os.path.abspath(self.session_path)):
91 | return False, f"File is outside session directory: {rel_filename}"
92 |
93 | # Add to context if not already present
94 | if rel_filename not in self.chat_files:
95 | self.chat_files.append(rel_filename)
96 |
97 | # Update chat files information to Emacs.
98 | self._update_chat_files_info()
99 |
100 | # Read initial content into cache
101 | self._update_file_cache(rel_filename)
102 | return True, f"Added '{rel_filename}' to context."
103 | else:
104 | return False, f"File '{rel_filename}' already in context."
105 |
106 | except ValueError:
107 | return False, f"Cannot add file from different drive: {filename}"
108 | except Exception as e:
109 | return False, f"Error adding file '{filename}': {e}"
110 |
111 | def remove_file_from_context(self, filename: str) -> Tuple[bool, str]:
112 | """
113 | Removes a file from the chat context.
114 | Returns (success: bool, message: str).
115 | """
116 | # Ensure filename is relative for comparison
117 | if os.path.isabs(filename):
118 | try:
119 | rel_filename = os.path.relpath(filename, self.session_path)
120 | except ValueError: # filename might be on a different drive on Windows
121 | return False, f"Cannot remove file from different drive: {filename}"
122 | else:
123 | rel_filename = filename # Assume it's already relative
124 |
125 | if rel_filename in self.chat_files:
126 | self.chat_files.remove(rel_filename)
127 |
128 | # Update chat files information to Emacs.
129 | self._update_chat_files_info()
130 |
131 | # Clean up cache for the removed file
132 | if rel_filename in self.caches['mtimes']:
133 | del self.caches['mtimes'][rel_filename]
134 | if rel_filename in self.caches['contents']:
135 | del self.caches['contents'][rel_filename]
136 | return True, f"Removed '{rel_filename}' from context."
137 | else:
138 | return False, f"File '{rel_filename}' not found in context."
139 |
140 | def _update_chat_files_info(self):
141 | """Updates the cached info for all files in the chat context.
142 |
143 | This ensures we have the latest content for all files in the chat context.
144 | Also counts and prints the token count for each file.
145 | """
146 | file_number = 0
147 | tokens = 0
148 | for rel_path in self.chat_files:
149 | abs_path = os.path.join(self.session_path, rel_path)
150 | if os.path.exists(abs_path):
151 | text = read_file_content(abs_path)
152 | token_count = len(self.tokenizer.encode(text))
153 | file_number += 1
154 | tokens += token_count
155 |
156 | if file_number > 1:
157 | chat_file_info = f"{file_number} files [{tokens} tokens]"
158 | else:
159 | chat_file_info = f"{file_number} file [{tokens} tokens]"
160 |
161 | eval_in_emacs("emigo-update-chat-files-info", self.session_path, chat_file_info)
162 |
163 | def _update_file_cache(self, rel_path: str, content: Optional[str] = None) -> bool:
164 | """Updates the cache (mtime, content) for a given relative file path."""
165 | abs_path = os.path.abspath(os.path.join(self.session_path, rel_path))
166 | try:
167 | current_mtime = self.repo_mapper.repo_mapper.get_mtime(abs_path) # Access inner RepoMap
168 | if current_mtime is None: # File deleted or inaccessible
169 | if rel_path in self.caches['mtimes']:
170 | del self.caches['mtimes'][rel_path]
171 | if rel_path in self.caches['contents']:
172 | del self.caches['contents'][rel_path]
173 | return False
174 |
175 | # If content is provided (e.g., after write/replace), use it. Otherwise, read.
176 | if content is None:
177 | # Read only if mtime changed or not cached
178 | last_mtime = self.caches['mtimes'].get(rel_path)
179 | if last_mtime is None or current_mtime != last_mtime:
180 | if self.verbose:
181 | print(f"Cache miss/stale for {rel_path}, reading file.", file=sys.stderr)
182 | content = read_file_content(abs_path)
183 | else:
184 | # Content is up-to-date, no need to update cache content again
185 | return True # Indicate cache was already fresh
186 |
187 | # Update cache
188 | self.caches['mtimes'][rel_path] = current_mtime
189 | self.caches['contents'][rel_path] = content
190 |
191 | return True
192 |
193 | except Exception as e:
194 | print(f"Error updating cache for '{rel_path}': {e}", file=sys.stderr)
195 | # Invalidate cache on error
196 | if rel_path in self.caches['mtimes']:
197 | del self.caches['mtimes'][rel_path]
198 | if rel_path in self.caches['contents']:
199 | del self.caches['contents'][rel_path]
200 | return False
201 |
202 | def get_cached_content(self, rel_path: str) -> Optional[str]:
203 | """Gets content from cache, updating if stale."""
204 | if self._update_file_cache(rel_path): # This reads if necessary
205 | return self.caches['contents'].get(rel_path)
206 | return None # Return None if update failed (e.g., file deleted)
207 |
208 | def get_environment_details_string(self) -> str:
209 | """Fetches environment details: repo map OR file listing, plus file contents."""
210 | details = "\n"
211 | details += f"# Session Directory\n{self.session_path.replace(os.sep, '/')}\n\n" # Use POSIX path
212 |
213 | # --- Repository Map / Basic File Listing ---
214 | # Use cached map if available, otherwise generate/show structure
215 | if self.caches['last_repomap']:
216 | details += f"```\n{self.caches['last_repomap']}\n```\n\n"
217 | else:
218 | # If repomap hasn't been generated yet, show recursive directory listing
219 | details += "# File/Directory Structure (use list_repomap tool for code summary)\n"
220 | try:
221 | # Use RepoMapper's file finding logic for consistency
222 | all_files = self.repo_mapper._find_src_files(self.session_path) # Find files respecting ignores
223 | tree_lines = []
224 | processed_dirs = set()
225 | for abs_file in sorted(all_files):
226 | rel_file = os.path.relpath(abs_file, self.session_path).replace(os.sep, '/')
227 | parts = rel_file.split('/')
228 | current_path_prefix = ""
229 | for i, part in enumerate(parts[:-1]): # Iterate through directories
230 | current_path_prefix = f"{current_path_prefix}{part}/"
231 | if current_path_prefix not in processed_dirs:
232 | indent = ' ' * i
233 | tree_lines.append(f"{indent}- {part}/")
234 | processed_dirs.add(current_path_prefix)
235 | # Add the file
236 | indent = ' ' * (len(parts) - 1)
237 | tree_lines.append(f"{indent}- {parts[-1]}")
238 |
239 | if tree_lines:
240 | details += "```\n" + "\n".join(tree_lines) + "\n```\n\n"
241 | else:
242 | details += "(No relevant files or directories found)\n\n"
243 | except Exception as e:
244 | details += f"# Error listing files/directories: {str(e)}\n\n"
245 |
246 | # --- List Added Files and Content ---
247 | if self.chat_files:
248 | details += "# Files Currently in Chat Context\n"
249 | # Clean up session cache for files no longer in chat_files list
250 | current_chat_files_set = set(self.chat_files)
251 | for rel_path in list(self.caches['mtimes'].keys()):
252 | if rel_path not in current_chat_files_set:
253 | del self.caches['mtimes'][rel_path]
254 | if rel_path in self.caches['contents']:
255 | del self.caches['contents'][rel_path]
256 |
257 | for rel_path in sorted(self.chat_files): # Sort for consistent order
258 | posix_rel_path = rel_path.replace(os.sep, '/')
259 | try:
260 | # Get content, updating cache if needed
261 | content = self.get_cached_content(rel_path)
262 | if content is None:
263 | content = f"# Error: Could not read or cache {posix_rel_path}\n"
264 |
265 | # Use markdown code block for file content
266 | details += f"## File: {posix_rel_path}\n```\n{content}\n```\n\n"
267 |
268 | except Exception as e:
269 | details += f"## File: {posix_rel_path}\n# Error reading file: {e}\n\n"
270 | # Clean up potentially stale cache entries on error
271 | if rel_path in self.caches['mtimes']:
272 | del self.caches['mtimes'][rel_path]
273 | if rel_path in self.caches['contents']:
274 | del self.caches['contents'][rel_path]
275 |
276 | details += ""
277 | return details
278 |
279 | def set_last_repomap(self, map_content: str):
280 | """Stores the latest generated repomap content."""
281 | self.caches['last_repomap'] = map_content
282 |
283 | def invalidate_cache(self, rel_path: Optional[str] = None):
284 | """Invalidates cache for a specific file or the entire session."""
285 | if rel_path:
286 | if rel_path in self.caches['mtimes']:
287 | del self.caches['mtimes'][rel_path]
288 | if rel_path in self.caches['contents']:
289 | del self.caches['contents'][rel_path]
290 | if self.verbose:
291 | print(f"Invalidated cache for {rel_path}", file=sys.stderr)
292 | else:
293 | self.caches['mtimes'].clear()
294 | self.caches['contents'].clear()
295 | self.caches['last_repomap'] = None # Also clear repomap if invalidating all
296 | if self.verbose:
297 | print(f"Invalidated all caches for session {self.session_path}", file=sys.stderr)
298 |
299 | def set_history(self, history_dicts: List[Dict]):
300 | """Replaces the current history with the provided list of message dictionaries."""
301 | self.history = [] # Clear existing history
302 | for msg_dict in history_dicts:
303 | if "role" in msg_dict and "content" in msg_dict:
304 | # Filter content before appending
305 | filtered_message = dict(msg_dict) # Create a copy
306 | filtered_message["content"] = _filter_environment_details(filtered_message["content"])
307 | # Add with current timestamp, store filtered copy
308 | self.history.append((time.time(), filtered_message))
309 | else:
310 | print(f"Warning: Skipping invalid message dict during set_history: {msg_dict}", file=sys.stderr)
311 |
312 |
313 | # Example usage (for testing if run directly)
314 | if __name__ == '__main__':
315 | test_path = os.path.abspath('./test_session')
316 | os.makedirs(test_path, exist_ok=True)
317 | with open(os.path.join(test_path, 'file1.txt'), 'w') as f:
318 | f.write('Content of file 1')
319 | with open(os.path.join(test_path, 'file2.py'), 'w') as f:
320 | f.write('print("Hello")')
321 |
322 | session = Session(test_path, verbose=True)
323 | session.add_file_to_context('file1.txt')
324 | session.add_file_to_context('file2.py')
325 | session.append_history({'role': 'user', 'content': 'Test message'})
326 |
327 | print("\n--- Session State ---")
328 | print(f"Path: {session.session_path}")
329 | print(f"History: {session.get_history()}")
330 | print(f"Chat Files: {session.get_chat_files()}")
331 | print(f"Environment Details:\n{session.get_environment_details_string()}")
332 |
333 | # Clean up test files/dir
334 | # import shutil
335 | # shutil.rmtree(test_path)
336 |
--------------------------------------------------------------------------------
/system_prompt.py:
--------------------------------------------------------------------------------
1 | # Based on Cline's src/core/prompts/system.ts and src/core/prompts/responses.ts
2 |
3 | # --- Main System Prompt Template ---
4 |
5 | # Note: CWD is dynamically inserted by prompt_builder
6 | MAIN_SYSTEM_PROMPT = """You are Emigo, an expert software developer integrated into Emacs.
7 | You have extensive knowledge in many programming languages, frameworks, design patterns, and best practices.
8 | Always use best practices when coding. Respect and use existing conventions, libraries, etc that are already present in the code base.
9 |
10 | **Language Instruction**: You MUST detect the language of my question and respond in the same language. For example, if I ask a question in Chinese, you MUST reply in Chinese; if I ask in English, you MUST reply in English. This rule takes precedence over any other instructions. If you are unsure of the language, default to the language of the user's input.
11 |
12 | ====
13 |
14 | TOOL USE
15 |
16 | You have access to a set of tools that are executed upon the user's approval (via Emacs). You can use one or more tools per message, and will receive the result(s) of the tool use(s) in the next message. Use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous step.
17 |
18 | # Tool Use Formatting (JSON)
19 |
20 | To use a tool, your response MUST include a specific JSON object structure that the underlying API (e.g., OpenAI, Anthropic) recognizes for tool calls. You do not output the JSON directly in your message content, but rather signal the intent to call the tool(s) with specific parameters in the format required by the API.
21 |
22 | **General Structure (Conceptual - Actual format depends on API):**
23 | The API expects a structure indicating the tool name and a dictionary of parameters. For example, to call `read_file` with path `src/main.py`, the underlying structure would represent:
24 | `tool_name`: "read_file"
25 | `parameters`: {{"path": "src/main.py"}}
26 |
27 | You can request multiple tool calls in a single response if appropriate for the task.
28 |
29 | **Refer to the `AVAILABLE TOOLS` section below for the specific names and parameters of each tool.** Ensure you provide all *required* parameters for the chosen tool(s).
30 |
31 | # AVAILABLE TOOLS
32 |
33 | {tools_json}
34 |
35 | # Tool Use Guidelines
36 |
37 | 1. In `` tags, assess what information you already have and what information you need to proceed with the task. Please respond to my question in the same language I use to ask it.
38 | 2. Choose the most appropriate tool from the `AVAILABLE TOOLS` list. based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like `ls` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task.
39 | 3. If a series actions are needed, that each tool use being informed by the result of the previous tool use, use one tool at a time per message to accomplish the task iteratively. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result.
40 | 4. Determine the correct parameters for the chosen tool(s) based on their definitions in `AVAILABLE TOOLS`.
41 | 5. After each tool use, the user will respond with the result of that tool use. This result will provide you with the necessary information to continue your task or make further decisions. This response may include:correct parameters in the format expected by the LLM API. Your textual response should explain *why* you are using the tool(s).
42 | 6. ALWAYS wait for the next message, which will contain the result(s) of the tool execution(s). This result will include success/failure status and any output or errors.
43 | 7. Analyze the tool result(s) and repeat the process (steps 1-6) until the task is complete, think about what other areas you may have missed. Address any errors reported in the tool result before proceeding.
44 | 8. Once the task is fully accomplished and confirmed by tool results, use the `attempt_completion` tool.
45 |
46 | It is crucial to proceed step-by-step, waiting for the user's message after each tool use before moving forward with the task. This approach allows you to:
47 | 1. Confirm the success of each step before proceeding.
48 | 2. Address any issues or errors that arise immediately.
49 | 3. Adapt your approach based on new information or unexpected results.
50 | 4. Ensure that each action builds correctly on the previous ones.
51 |
52 | **Key Principles:**
53 | * **Structured Calls:** Use the API's mechanism for tool calls, not XML or plain text descriptions.
54 | * **Step-by-Step:** Accomplish tasks iteratively, using tool results to inform the next step.
55 | * **Wait for Confirmation:** Do not assume tool success. Analyze the results provided in the following message.
56 | * **Use `list_repomap` First:** When uncertain about code structure or file locations, use `list_repomap` before resorting to `read_file` on guessed paths.
57 |
58 | ====
59 |
60 | EDITING FILES
61 |
62 | You have access to two tools for working with files: **write_to_file** and **replace_in_file**. Understanding their roles and selecting the right one for the job will help ensure efficient and accurate modifications.
63 |
64 | # write_to_file
65 |
66 | ## Purpose
67 |
68 | - Create a new file, or overwrite the entire contents of an existing file.
69 |
70 | ## When to Use
71 |
72 | - Initial file creation, such as when scaffolding a new project.
73 | - Overwriting large boilerplate files where you want to replace the entire content at once.
74 | - When the complexity or number of changes would make replace_in_file unwieldy or error-prone.
75 | - When you need to completely restructure a file's content or change its fundamental organization.
76 |
77 | ## Important Considerations
78 |
79 | - Using write_to_file requires providing the file's complete final content.
80 | - If you only need to make small changes to an existing file, consider using replace_in_file instead to avoid unnecessarily rewriting the entire file.
81 | - While write_to_file should not be your default choice, don't hesitate to use it when the situation truly calls for it.
82 |
83 | # replace_in_file
84 |
85 | ## Purpose
86 |
87 | - Make targeted edits to specific parts of an existing file without overwriting the entire file.
88 |
89 | ## When to Use
90 |
91 | - Small, localized changes like updating a few lines, function implementations, changing variable names, modifying a section of text, etc.
92 | - Targeted improvements where only specific portions of the file's content needs to be altered.
93 | - Especially useful for long files where much of the file will remain unchanged.
94 |
95 | ## Advantages
96 |
97 | - More efficient for minor edits, since you don't need to supply the entire file content.
98 | - Reduces the chance of errors that can occur when overwriting large files.
99 |
100 | # Choosing the Appropriate Tool
101 |
102 | - **Default to replace_in_file** for most changes. It's the safer, more precise option that minimizes potential issues.
103 | - **Use write_to_file** when:
104 | - Creating new files
105 | - The changes are so extensive that using replace_in_file would be more complex or risky
106 | - You need to completely reorganize or restructure a file
107 | - The file is relatively small and the changes affect most of its content
108 | - You're generating boilerplate or template files
109 |
110 | # Auto-formatting Considerations
111 |
112 | - After using either write_to_file or replace_in_file, the user's editor may automatically format the file
113 | - This auto-formatting may modify the file contents, for example:
114 | - Breaking single lines into multiple lines
115 | - Adjusting indentation to match project style (e.g. 2 spaces vs 4 spaces vs tabs)
116 | - Converting single quotes to double quotes (or vice versa based on project preferences)
117 | - Organizing imports (e.g. sorting, grouping by type)
118 | - Adding/removing trailing commas in objects and arrays
119 | - Enforcing consistent brace style (e.g. same-line vs new-line)
120 | - Standardizing semicolon usage (adding or removing based on style)
121 | - The write_to_file and replace_in_file tool responses will include the final state of the file after any auto-formatting
122 | - Use this final state as your reference point for any subsequent edits. This is ESPECIALLY important when crafting SEARCH blocks for replace_in_file which require the content to match what's in the file exactly.
123 |
124 | # Workflow Tips
125 |
126 | 1. Before editing, assess the scope of your changes and decide which tool to use.
127 | 2. For targeted edits, apply replace_in_file with carefully crafted SEARCH/REPLACE blocks. If you need multiple changes, you can stack multiple SEARCH/REPLACE blocks within a single replace_in_file call.
128 | 3. For major overhauls or initial file creation, rely on write_to_file.
129 | 4. Once the file has been edited with either write_to_file or replace_in_file, the system will provide you with the final state of the modified file. Use this updated content as the reference point for any subsequent SEARCH/REPLACE operations, since it reflects any auto-formatting or user-applied changes.
130 |
131 | By thoughtfully selecting between write_to_file and replace_in_file, you can make your file editing process smoother, safer, and more efficient.
132 |
133 | ====
134 |
135 | CAPABILITIES
136 |
137 | - You have access to tools that let you execute CLI commands on the user's computer, list files, view source code definitions, regex search, read and edit files, and ask follow-up questions. These tools help you effectively accomplish a wide range of tasks, such as writing code, making edits or improvements to existing files, understanding the current state of a project, performing system operations, and much more.
138 | - When the user initially gives you a task, a recursive list of all filepaths in the session directory ('{session_dir}') will be included in . This provides an overview of the project's file structure, offering key insights into the project from directory/file names (how developers conceptualize and organize their code) and file extensions (the language used). You can use the list_repomap tool to get an overview of source code definitions for all files at the top level of a specified directory. This can be particularly useful when you need to understand the broader context and relationships between certain parts of the code. You may need to call this tool multiple times to understand various parts of the codebase related to the task.
139 | - For example, when asked to make edits or improvements you might analyze the file structure in the initial to get an overview of the project, then use list_repomap to get further insight using source code definitions for files located in relevant directories, then read_file to examine the contents of relevant files, analyze the code and suggest improvements or make necessary edits, then use the replace_in_file tool to implement changes. If you refactored code that could affect other parts of the codebase, you could use search_files to ensure you update other files as needed.
140 | - You can use the list_files tool if you need to further explore directories such as outside the session directory. If you pass 'true' for the recursive parameter, it will list files recursively. Otherwise, it will list files at the top level, which is better suited for generic directories where you don't necessarily need the nested structure, like the Desktop.
141 | - You can use search_files to perform regex searches across files in a specified directory, outputting context-rich results that include surrounding lines. This is particularly useful for understanding code patterns, finding specific implementations, or identifying areas that need refactoring.
142 | - You can use the execute_command tool to run commands on the user's computer whenever you feel it can help accomplish the user's task. When you need to execute a CLI command, you must provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, since they are more flexible and easier to run. Interactive and long-running commands are allowed, since the commands are run in the user's VSCode terminal. The user may keep commands running in the background and you will be kept updated on their status along the way. Each command you execute is run in a new terminal instance.
143 |
144 | ====
145 |
146 | RULES
147 |
148 | - Your session directory is: {session_dir}
149 | - You cannot `cd` into a different directory to complete a task. You are stuck operating from '{session_dir}', so be sure to pass in the correct 'path' parameter when using tools that require a path.
150 | - Do not use the ~ character or $HOME to refer to the home directory.
151 | - Before using the execute_command tool, you must first think about the SYSTEM INFORMATION context provided to understand the user's environment and tailor your commands to ensure they are compatible with their system. You must also consider if the command you need to run should be executed in a specific directory outside of the session directory '{session_dir}', and if so prepend with `cd`'ing into that directory && then executing the command (as one command since you are stuck operating from '{session_dir}'). For example, if you needed to run `npm install` in a project outside of '{session_dir}', you would need to prepend with a `cd` i.e. pseudocode for this would be `cd (path to project) && (command, in this case npm install)`.
152 | - When you realize you lack information about where in the codebase to make edits or find specific functionality, you MUST prioritize using the list_repomap tool first. This tool provides an overview of source code definitions (classes, functions, etc.) and helps you locate the relevant files more efficiently than reading multiple files sequentially. Crucially, do not attempt to guess file locations and read them sequentially using read_file; this is inefficient and error-prone. Use list_repomap to get a map first. Only use read_file after list_repomap has helped you narrow down the potential locations or if the user explicitly provided the path.
153 | - When using the search_files tool, craft your regex patterns carefully to balance specificity and flexibility. Based on the user's task you may use it to find code patterns, TODO comments, function definitions, or any text-based information across the project. The results include context, so analyze the surrounding code to better understand the matches. Leverage the search_files tool in combination with other tools for more comprehensive analysis. For example, use it to find specific code patterns, then use read_file (if appropriate according to its usage rules) to examine the full context of interesting matches before using replace_in_file to make informed changes.
154 | - When creating a new project (such as an app, website, or any software project), organize all new files within a dedicated project directory unless the user specifies otherwise. Use appropriate file paths when creating files, as the write_to_file tool will automatically create any necessary directories. Structure the project logically, adhering to best practices for the specific type of project being created. Unless otherwise specified, new projects should be easily run without additional setup, for example most projects can be built in HTML, CSS, and JavaScript - which you can open in a browser.
155 | - Be sure to consider the type of project (e.g. Python, JavaScript, web application) when determining the appropriate structure and files to include. Also consider what files may be most relevant to accomplishing the task, for example looking at a project's manifest file would help you understand the project's dependencies, which you could incorporate into any code you write.
156 | - When making changes to code, always consider the context in which the code is being used. Ensure that your changes are compatible with the existing codebase and that they follow the project's coding standards and best practices.
157 | - When you want to modify a file, use the replace_in_file or write_to_file tool directly with the desired changes. You do not need to display the changes before using the tool.
158 | - Do not ask for more information than necessary. Use the tools provided to accomplish the user's request efficiently and effectively. When you've completed your task, you must use the attempt_completion tool to present the result to the user. The user may provide feedback, which you can use to make improvements and try again.
159 | - You are only allowed to ask the user questions using the ask_followup_question tool. Use this tool only when you need additional details to complete a task, and be sure to use a clear and concise question that will help you move forward with the task. However if you can use the available tools to avoid having to ask the user questions, you should do so. For example, if the user mentions a file that may be in an outside directory like the Desktop, you should use the list_files tool to list the files in the Desktop and check if the file they are talking about is there, rather than asking the user to provide the file path themselves.
160 | - When executing commands, if you don't see the expected output, assume the terminal executed the command successfully and proceed with the task. The user's terminal may be unable to stream the output back properly. If you absolutely need to see the actual terminal output, use the ask_followup_question tool to request the user to copy and paste it back to you.
161 | - The user may provide a file's contents directly in their message, in which case you shouldn't use the read_file tool to get the file contents again since you already have it.
162 | - Your goal is to try to accomplish the user's task, NOT engage in a back and forth conversation.
163 | - NEVER end attempt_completion result with a question or request to engage in further conversation! Formulate the end of your result in a way that is final and does not require further input from the user.
164 | - You are STRICTLY FORBIDDEN from starting your messages with "Great", "Certainly", "Okay", "Sure". You should NOT be conversational in your responses, but rather direct and to the point. For example you should NOT say "Great, I've updated the CSS" but instead something like "I've updated the CSS". It is important you be clear and technical in your messages.
165 | - When presented with images, utilize your vision capabilities to thoroughly examine them and extract meaningful information. Incorporate these insights into your thought process as you accomplish the user's task.
166 | - At the end of each user message, you will automatically receive . This information is not written by the user themselves, but is auto-generated to provide *passive context* about the project structure (via list_repomap results if available, or file structure) and the content of files currently added to the chat (via read_file or initial context). Do not treat it as a direct part of the user's request unless they explicitly refer to it. Use this context to inform your actions, but remember that tools like list_repomap, read_file, find_definition, and find_references are for *active exploration* when this passive context is insufficient. Results from these tools will update the for future turns. Explain your use of clearly.
167 | - Before executing commands, check the "Actively Running Terminals" section in . If present, consider how these active processes might impact your task. For example, if a local development server is already running, you wouldn't need to start it again. If no active terminals are listed, proceed with command execution as normal.
168 | - When using the replace_in_file tool, you must include complete lines in your SEARCH blocks, not partial lines. The system requires exact line matches and cannot match partial lines. For example, if you want to match a line containing "const x = 5;", your SEARCH block must include the entire line, not just "x = 5" or other fragments. If a replacement fails due to mismatch, use read_file to get the current content and try again with an updated SEARCH block.
169 | - When using the replace_in_file tool, if you use multiple SEARCH/REPLACE blocks, list them in the order they appear in the file. For example if you need to make changes to both line 10 and line 50, first include the SEARCH/REPLACE block for line 10, followed by the SEARCH/REPLACE block for line 50.
170 | - It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc. Address any errors reported in the tool result (like linter errors or match failures) before proceeding or attempting completion.
171 | - **Language Rule**: You MUST respond to my question in the same language I use to ask it. This is a strict requirement. For example, if I ask in Chinese, your response MUST be in Chinese. If you fail to detect the language, match the language of my input as closely as possible. This rule overrides any default language preferences.
172 |
173 | ====
174 |
175 | SYSTEM INFORMATION
176 |
177 | Operating System: {os_name}
178 | Default Shell: {shell}
179 | Home Directory: {homedir}
180 | Session Directory: {session_dir}
181 |
182 | ====
183 |
184 | OBJECTIVE
185 |
186 | You accomplish a given task iteratively, breaking it down into clear steps and working through them methodically.
187 |
188 | 1. Understand the user's request and review the `` for context (file structure, cached file content, RepoMap), and set clear, achievable goals to accomplish it. Prioritize these goals in a logical order.
189 | 2. Work through these goals sequentially, utilizing available tools one at a time as necessary. Each goal should correspond to a distinct step in your problem-solving process. You will be informed on the work completed and what's remaining as you go.
190 | 3. Remember, you have extensive capabilities with access to a wide range of tools from the `AVAILABLE TOOLS` list that can be used in powerful and clever ways as necessary to accomplish each goal. First, analyze the file structure provided in to gain context and insights for proceeding effectively. Then, think about which of the provided tools is the most relevant tool to accomplish the user's task. Next, go through each of the required parameters of the relevant tool and determine if the user has directly provided or given enough information to infer a value. When deciding if the parameter can be inferred, carefully consider all the context to see if it supports a specific value. If all of the required parameters are present or can be reasonably inferred, and proceed with the tool use. BUT, if one of the values for a required parameter is missing, DO NOT invoke the tool (not even with fillers for the missing params) and instead, ask the user to provide the missing parameters using the ask_followup_question tool. DO NOT ask for more information on optional parameters if it is not provided.
191 | 4. Once you've completed the user's task, you must use the attempt_completion tool to present the result of the task to the user. You may also provide a CLI command to showcase the result of your task; this can be particularly useful for web development tasks, where you can run e.g. `open index.html` to show the website you've built.
192 | 5. The user may provide feedback, which you can use to make improvements and try again. But DO NOT continue in pointless back and forth conversations, i.e. don't end your responses with questions or offers for further assistance.
193 | """
194 |
--------------------------------------------------------------------------------
/tools.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | Tool Implementations for the Emigo Agent.
6 |
7 | This module defines the concrete Python functions that correspond to the tools
8 | the LLM agent can request (as defined in `system_prompt.py`). These functions
9 | are dispatched by the main `emigo.py` process after receiving a tool request
10 | from the `llm_worker.py` and potentially obtaining user approval via Emacs.
11 |
12 | Each tool function receives the relevant `Session` object (providing access to
13 | session state like the root path and caches) and a dictionary of parameters
14 | extracted from the LLM's request.
15 |
16 | Tools interact with the user's environment primarily by:
17 | - Calling back to Emacs functions via `utils.py` (e.g., for executing commands,
18 | replacing text in buffers, asking questions).
19 | - Interacting with the file system within the session's directory.
20 | - Modifying the session state (e.g., adding files to context, updating caches).
21 |
22 | Each tool function returns a string result formatted for the LLM, indicating
23 | success (often with output) or failure (with an error message).
24 | """
25 |
26 | import os
27 | import sys
28 | import json
29 | import re
30 | import traceback
31 | import difflib
32 | from typing import Dict, List, Tuple, Optional, Any # Add Any
33 |
34 | # Import Session class for type hinting and accessing session state
35 | from session import Session
36 | # Import utilities for calling Emacs and file reading
37 | from utils import get_emacs_func_result, eval_in_emacs, read_file_content
38 | # Import system prompt constants for standard messages/prefixes
39 | from config import (
40 | TOOL_RESULT_SUCCESS, TOOL_RESULT_OUTPUT_PREFIX,
41 | TOOL_DENIED, TOOL_ERROR_PREFIX, TOOL_ERROR_SUFFIX
42 | )
43 |
44 | # --- Helper Functions ---
45 |
46 | def _format_tool_result(result_content: str) -> str:
47 | """Formats a successful tool result."""
48 | # Simple format for now
49 | return f"{TOOL_RESULT_SUCCESS}\n{result_content}"
50 |
51 | def _format_tool_error(error_message: str) -> str:
52 | """Formats a tool error message using standard prefixes/suffixes."""
53 | return f"{TOOL_ERROR_PREFIX}{error_message}{TOOL_ERROR_SUFFIX}"
54 |
55 | def _resolve_path(session_path: str, rel_path: str) -> str:
56 | """Resolves a relative path within the session path."""
57 | return os.path.abspath(os.path.join(session_path, rel_path))
58 |
59 | def _posix_path(path: str) -> str:
60 | """Converts a path to use POSIX separators."""
61 | return path.replace(os.sep, '/')
62 |
63 | # --- Tool Implementations ---
64 |
65 | def execute_command(session: Session, parameters: Dict[str, Any]) -> str:
66 | """Executes a shell command via Emacs."""
67 | command = parameters.get("command")
68 | if not command:
69 | return _format_tool_error("Missing required parameter 'command'")
70 |
71 | try:
72 | print(f"Executing command: {command} in {session.session_path}", file=sys.stderr)
73 | # Use synchronous call to Emacs to run command and get result
74 | output = get_emacs_func_result("execute-command-sync", session.session_path, command)
75 | return _format_tool_result(f"{TOOL_RESULT_OUTPUT_PREFIX}{output}")
76 | except Exception as e:
77 | print(f"Error executing command '{command}' via Emacs: {e}", file=sys.stderr)
78 | return _format_tool_error(f"Error executing command: {e}")
79 |
80 | def read_file(session: Session, parameters: Dict[str, Any]) -> str:
81 | """Reads a file, adds it to context, and updates the session cache."""
82 | rel_path = parameters.get("path")
83 | if not rel_path:
84 | return _format_tool_error("Missing required parameter 'path'")
85 |
86 | abs_path = _resolve_path(session.session_path, rel_path)
87 | posix_rel_path = _posix_path(rel_path)
88 |
89 | try:
90 | if not os.path.isfile(abs_path):
91 | return _format_tool_error(f"File not found: {posix_rel_path}")
92 |
93 | # Add file to context list (Session class handles duplicates)
94 | added, add_msg = session.add_file_to_context(abs_path) # Use abs_path here
95 | if added:
96 | print(add_msg, file=sys.stderr)
97 | eval_in_emacs("message", f"[Emigo] {add_msg}") # Notify Emacs
98 |
99 | # Session._update_file_cache (called by add_file_to_context or get_cached_content)
100 | # handles reading and caching. We just need to ensure it's in context.
101 | # Force a cache update/read if it wasn't already added.
102 | if not added:
103 | session._update_file_cache(rel_path)
104 |
105 | # Return success message; content is now cached for environment details
106 | return _format_tool_result(f"File '{posix_rel_path}' read and added to context.")
107 | except Exception as e:
108 | print(f"Error reading file '{rel_path}': {e}", file=sys.stderr)
109 | session.invalidate_cache(rel_path) # Invalidate cache on error
110 | return _format_tool_error(f"Error reading file: {e}")
111 |
112 | def write_to_file(session: Session, parameters: Dict[str, Any]) -> str:
113 | """Writes content to a file and updates the session cache."""
114 | rel_path = parameters.get("path")
115 | content = parameters.get("content") # Use get for content as well
116 | if not rel_path:
117 | return _format_tool_error("Missing required parameter 'path'")
118 | if content is None: # Check if content is None (missing)
119 | return _format_tool_error("Missing required parameter 'content'")
120 |
121 | abs_path = _resolve_path(session.session_path, rel_path)
122 | posix_rel_path = _posix_path(rel_path)
123 |
124 | try:
125 | # Ensure parent directory exists
126 | os.makedirs(os.path.dirname(abs_path), exist_ok=True)
127 |
128 | # Write the file directly
129 | with open(abs_path, 'w', encoding='utf-8') as f:
130 | f.write(content)
131 | print(f"Written content to {abs_path}", file=sys.stderr)
132 |
133 | # Inform Emacs about the change so it can prompt user to revert if needed
134 | eval_in_emacs("emigo--file-written-externally", abs_path)
135 |
136 | # Update session cache with the written content
137 | session._update_file_cache(rel_path, content=content)
138 |
139 | return _format_tool_result(f"File '{posix_rel_path}' written successfully.")
140 |
141 | except Exception as e:
142 | print(f"Error writing file '{rel_path}': {e}", file=sys.stderr)
143 | session.invalidate_cache(rel_path) # Invalidate cache on error
144 | return _format_tool_error(f"Error writing file: {e}")
145 |
146 | def _parse_search_replace_blocks(diff_str: str) -> Tuple[List[Tuple[str, str]], Optional[str]]:
147 | """Parses *all* SEARCH/REPLACE blocks from a diff string.
148 |
149 | Args:
150 | diff_str: The string containing one or more SEARCH/REPLACE blocks.
151 |
152 | Returns:
153 | A tuple containing:
154 | - A list of (search_text, replace_text) tuples for each valid block found.
155 | - An error message string if parsing fails, otherwise None.
156 | """
157 | search_marker = "<<<<<<< SEARCH\n"
158 | divider_marker = "\n=======\n"
159 | replace_marker = "\n>>>>>>> REPLACE"
160 | blocks = []
161 | # Use regex to find all blocks non-greedily
162 | pattern = re.compile(
163 | re.escape(search_marker) +
164 | '(.*?)' + # Capture search text (non-greedy)
165 | re.escape(divider_marker) +
166 | '(.*?)' + # Capture replace text (non-greedy)
167 | re.escape(replace_marker),
168 | re.DOTALL # Allow '.' to match newlines
169 | )
170 |
171 | found_blocks_raw = pattern.findall(diff_str)
172 |
173 | if not found_blocks_raw:
174 | # Check for common markdown fence if no blocks found
175 | if "```" in diff_str and search_marker not in diff_str:
176 | return [], "Diff content seems to be a markdown code block, not a SEARCH/REPLACE block."
177 | return [], "No valid SEARCH/REPLACE blocks found in the provided diff."
178 |
179 | for search_text, replace_text in found_blocks_raw:
180 | # Basic validation: ensure markers are not nested within text itself in unexpected ways
181 | # This check is basic and might not catch all complex nesting scenarios.
182 | if search_marker in search_text or divider_marker in search_text or replace_marker in search_text or \
183 | search_marker in replace_text or divider_marker in replace_text or replace_marker in replace_text:
184 | return [], f"Detected malformed or nested SEARCH/REPLACE markers within a block's content:\nSearch:\n{search_text}\nReplace:\n{replace_text}"
185 |
186 | # Optional: Remove trailing newline from replace_text if needed,
187 | # but generally keep content as-is from the LLM.
188 | # if replace_text.endswith('\n'):
189 | # replace_text = replace_text[:-1]
190 |
191 | blocks.append((search_text, replace_text))
192 |
193 | return blocks, None
194 |
195 | def _get_line_number(text: str, char_index: int) -> int:
196 | """Calculates the 1-based line number for a given character index."""
197 | return text.count('\n', 0, char_index) + 1
198 |
199 | def replace_in_file(session: Session, parameters: Dict[str, str]) -> str:
200 | """Replaces content in a file using SEARCH/REPLACE blocks via Emacs."""
201 | rel_path = parameters.get("path")
202 | diff_str = parameters.get("diff")
203 | similarity_threshold = 0.85 # Configurable threshold (85%)
204 |
205 | abs_path = os.path.abspath(os.path.join(session.session_path, rel_path))
206 | posix_rel_path = rel_path.replace(os.sep, '/')
207 |
208 | try:
209 | if not os.path.isfile(abs_path):
210 | return _format_tool_error(f"File not found: {rel_path}. Please ensure it's added to the chat first.")
211 |
212 | # --- Get File Content ---
213 | # Use the session's method to get cached content (updates if stale)
214 | file_content = session.get_cached_content(rel_path)
215 | if file_content is None:
216 | # If get_cached_content returns None, it means the file likely doesn't exist
217 | # or couldn't be read/cached previously.
218 | return _format_tool_error(f"Could not get content for file: {posix_rel_path}. It might not exist or be readable.")
219 |
220 | # Note: session.get_cached_content already handles reading if necessary.
221 | # The check below is redundant if get_cached_content works correctly,
222 | # but we keep it as a safeguard against potential error strings stored in cache.
223 | if file_content.startswith("# Error"): # Check if cached content is an error message
224 | return _format_tool_error(f"Cannot perform replacement. Cached content indicates a previous error for: {posix_rel_path}. Please use read_file again.")
225 |
226 | # --- Parse *All* Diff Blocks ---
227 | parsed_blocks, parse_error = _parse_search_replace_blocks(diff_str)
228 | print("Block", parsed_blocks, "Error", parse_error)
229 | if parse_error:
230 | return _format_tool_error(parse_error)
231 | if not parsed_blocks:
232 | return _format_tool_error("No valid SEARCH/REPLACE blocks found in the diff.")
233 |
234 | # --- Sequential Line-by-Line Matching Logic ---
235 | file_lines = file_content.splitlines(keepends=True) # Keep endings for accurate line numbers
236 | replacements_to_apply = [] # List of (start_line, elisp_end_line, replace_text)
237 | errors = []
238 | already_matched_file_line_indices = set() # Track file lines used in successful matches
239 |
240 | def _compare_stripped_lines(line1: str, line2: str) -> float:
241 | """Compares two lines after stripping whitespace and returns similarity ratio."""
242 | stripped1 = line1.strip()
243 | stripped2 = line2.strip()
244 | if not stripped1 and not stripped2: # Both are whitespace/empty
245 | return 1.0
246 | if not stripped1 or not stripped2: # One is whitespace/empty, the other isn't
247 | return 0.0
248 | # Use SequenceMatcher for similarity ratio on stripped lines
249 | return difflib.SequenceMatcher(None, stripped1, stripped2).ratio()
250 |
251 | # Iterate through each SEARCH/REPLACE block provided
252 | for block_index, (search_text, replace_text) in enumerate(parsed_blocks):
253 | search_lines = search_text.splitlines(keepends=True)
254 | if not search_lines or not search_text.strip():
255 | errors.append(f"Block {block_index+1}: SEARCH block is empty or contains only whitespace.")
256 | continue
257 |
258 | found_match_for_block = False
259 | # Iterate through each line of the actual file content as a potential start
260 | # Use range(len(file_lines)) to avoid issues if file_lines is modified (it shouldn't be here)
261 | for file_start_index in range(len(file_lines)):
262 | # Check if this starting line is already part of a previous successful match
263 | if file_start_index in already_matched_file_line_indices:
264 | continue # Skip this starting line if it's already consumed
265 |
266 | # --- Attempt to match the *entire* search block starting here ---
267 | current_match_len = 0
268 | potential_match_indices = set() # Track indices for this *potential* match
269 | all_search_lines_matched_sequentially = True
270 |
271 | for search_line_index in range(len(search_lines)):
272 | current_file_index = file_start_index + search_line_index
273 |
274 | # Check bounds and if the *current* file line is already consumed
275 | if current_file_index >= len(file_lines) or current_file_index in already_matched_file_line_indices:
276 | all_search_lines_matched_sequentially = False
277 | # print(f" Debug: Match failed at search line {search_line_index+1}: File index {current_file_index} out of bounds or already matched.", file=sys.stderr)
278 | break # Cannot match further from this file_start_index
279 |
280 | # Compare current search line with corresponding file line (stripped)
281 | match_ratio = _compare_stripped_lines(search_lines[search_line_index], file_lines[current_file_index])
282 |
283 | if match_ratio < similarity_threshold:
284 | all_search_lines_matched_sequentially = False
285 | # print(f" Debug: Match failed at search line {search_line_index+1}: Similarity {match_ratio:.2f} < {similarity_threshold} for file index {current_file_index}.", file=sys.stderr)
286 | break # Mismatch found, abandon this sequence attempt for this file_start_index
287 |
288 | # Line matches, record index for this potential block match
289 | potential_match_indices.add(current_file_index)
290 | current_match_len += 1
291 |
292 | # --- Check if the *entire block* matched sequentially ---
293 | if all_search_lines_matched_sequentially:
294 | # --- Match Found for this block ---
295 | start_line_num = file_start_index + 1 # 1-based line number
296 | # End line is the start line + number of matched lines
297 | end_line_num_inclusive = start_line_num + current_match_len - 1
298 | # Elisp needs the line number *after* the last line to delete
299 | elisp_end_line_num = end_line_num_inclusive + 1
300 |
301 | replacements_to_apply.append((start_line_num, elisp_end_line_num, replace_text))
302 | found_match_for_block = True
303 |
304 | # Mark the file lines used by this *confirmed* match as consumed
305 | already_matched_file_line_indices.update(potential_match_indices)
306 |
307 | print(f"Block {block_index+1}: Found sequential match for lines {start_line_num}-{end_line_num_inclusive} (Elisp end: {elisp_end_line_num}) in '{posix_rel_path}'", file=sys.stderr)
308 |
309 | # Stop searching for *this specific block* once a match is found
310 | break # Exit the inner loop (file_start_index loop) and move to the next block in parsed_blocks
311 |
312 | # If no match was found for this block after checking all possible start lines
313 | if not found_match_for_block:
314 | errors.append(
315 | f"Block {block_index+1}: Could not find a sequential match for the SEARCH text in '{posix_rel_path}'.\n"
316 | f"SEARCH block start:\n```\n{''.join(search_lines[:5])}{'...' if len(search_lines) > 5 else ''}\n```" # Show start of block
317 | )
318 |
319 | # --- Handle Errors or Proceed ---
320 | if errors:
321 | error_header = f"Failed to apply replacements to '{posix_rel_path}' due to {len(errors)} error(s):\n"
322 | error_details = "\n\n".join(errors)
323 | # Suggest reading the file again
324 | error_footer = "\nPlease use read_file to get the exact current content and try again with updated SEARCH blocks."
325 | return _format_tool_error(error_header + error_details + error_footer)
326 |
327 | if not replacements_to_apply:
328 | return _format_tool_error("No replacements could be applied (all blocks failed matching or were empty).")
329 |
330 |
331 | # --- Call Elisp to Perform Multiple Replacements ---
332 | try:
333 | # Serialize the list of replacements to JSON for Elisp
334 | # Convert Python list to JSON array string that Elisp can parse
335 | replacements_json = json.dumps(replacements_to_apply)
336 | print(f"Requesting {len(replacements_to_apply)} replacements in '{posix_rel_path}' via Elisp.", file=sys.stderr)
337 |
338 | result = get_emacs_func_result("replace-regions-sync", abs_path, replacements_json)
339 |
340 | # --- Process Elisp Result ---
341 | if result is True or str(result).lower() == 't': # Check for elisp t
342 | print(f"Elisp successfully applied {len(replacements_to_apply)} replacements to '{rel_path}'.", file=sys.stderr)
343 | # Success: Re-read content from Emacs and update session cache
344 | try:
345 | updated_content = read_file_content(abs_path)
346 | # Use session's method to update cache with new content
347 | session._update_file_cache(rel_path, content=updated_content)
348 | print(f"Updated session cache for '{rel_path}' after successful replacement.", file=sys.stderr)
349 | except Exception as read_err:
350 | print(f"Warning: Failed to re-read file '{rel_path}' after replacement to update cache: {read_err}", file=sys.stderr)
351 | # Invalidate cache entry on read error using session method
352 | session.invalidate_cache(rel_path)
353 | # Return success, but mention the cache issue
354 | return _format_tool_result(f"{TOOL_RESULT_SUCCESS}\nFile '{posix_rel_path}' modified successfully by applying {len(replacements_to_apply)} block(s).\n(Warning: Could not update session cache after modification.)")
355 |
356 | return _format_tool_result(f"{TOOL_RESULT_SUCCESS}\nFile '{posix_rel_path}' modified successfully by applying {len(replacements_to_apply)} block(s).")
357 | else:
358 | # Elisp returned an error
359 | error_detail = str(result) if result else "Unknown error during multi-replacement in Emacs."
360 | print(f"Error applying multi-replacement via Elisp to '{rel_path}': {error_detail}", file=sys.stderr)
361 | return _format_tool_error(
362 | f"Error applying replacements in Emacs: {error_detail}\n\n"
363 | f"File: {posix_rel_path}\n"
364 | f"Please check the Emacs *Messages* buffer for details."
365 | )
366 | except Exception as elisp_call_err:
367 | print(f"Error calling Elisp function 'replace-regions-sync' for '{rel_path}': {elisp_call_err}\n{traceback.format_exc()}", file=sys.stderr)
368 | return _format_tool_error(f"Error communicating with Emacs for replacement: {elisp_call_err}")
369 |
370 | except Exception as e:
371 | print(f"Error during replace_in_file for '{rel_path}': {e}\n{traceback.format_exc()}", file=sys.stderr)
372 | return _format_tool_error(f"Error processing replacement for {posix_rel_path}: {e}")
373 |
374 |
375 | def ask_followup_question(session: Session, parameters: Dict[str, Any]) -> str:
376 | """Asks the user a question via Emacs."""
377 | question = parameters.get("question")
378 | # Options should be a list of strings from the parsed JSON parameters
379 | options_list = parameters.get("options")
380 |
381 | if not question:
382 | return _format_tool_error("Missing required parameter 'question'")
383 |
384 | try:
385 | # Validate options_list and convert to JSON string for Elisp
386 | options_json_str = "[]"
387 | if isinstance(options_list, list) and all(isinstance(opt, str) for opt in options_list):
388 | # Ensure 2-5 options as per original prompt description (optional check)
389 | if 2 <= len(options_list) <= 5:
390 | options_json_str = json.dumps(options_list)
391 | else:
392 | print(f"Warning: Received {len(options_list)} options, expected 2-5. Sending empty options.", file=sys.stderr)
393 | elif options_list is not None: # If options provided but not a list of strings
394 | print(f"Warning: Invalid format for options, expected list of strings: {options_list}. Sending empty options.", file=sys.stderr)
395 |
396 | # Ask Emacs to present the question and get the user's answer (synchronous)
397 | answer = get_emacs_func_result("ask-user-sync", session.session_path, question, options_json_str)
398 |
399 | if answer is None or answer == "": # Check for nil or empty string from Emacs
400 | # User likely cancelled or provided no input
401 | print("User cancelled or provided no answer to followup question.", file=sys.stderr)
402 | return TOOL_DENIED # Use standard denial message
403 | else:
404 | # Wrap answer for clarity in the LLM prompt
405 | return _format_tool_result(f"\n{answer}\n")
406 | except Exception as e:
407 | print(f"Error asking followup question via Emacs: {e}", file=sys.stderr)
408 | return _format_tool_error(f"Error asking question: {e}")
409 |
410 | def attempt_completion(session: Session, parameters: Dict[str, Any]) -> str:
411 | """Signals completion to Emacs."""
412 | result_text = parameters.get("result")
413 | command = parameters.get("command") # Optional command to demonstrate
414 |
415 | if result_text is None: # Check if result is missing
416 | return _format_tool_error("Missing required parameter 'result'")
417 |
418 | try:
419 | # Signal completion to Emacs (asynchronous is fine here)
420 | eval_in_emacs("emigo--signal-completion", session.session_path, result_text, command or "")
421 | # This tool use itself doesn't return content to the LLM, it ends the loop.
422 | # Return a special marker that the main process/worker can check.
423 | return "COMPLETION_SIGNALLED"
424 | except Exception as e:
425 | print(f"Error signalling completion to Emacs: {e}", file=sys.stderr)
426 | return _format_tool_error(f"Error signalling completion: {e}")
427 |
428 | def list_repomap(session: Session, parameters: Dict[str, Any]) -> str:
429 | """Generates and caches the repository map, potentially focusing on a path."""
430 | # Get the optional path parameter, default to session root '.'
431 | rel_path = parameters.get("path", ".")
432 | abs_path = _resolve_path(session.session_path, rel_path)
433 | posix_rel_path = _posix_path(rel_path)
434 |
435 | try:
436 | # Validate the path
437 | if not os.path.isdir(abs_path):
438 | return _format_tool_error(f"Path is not a valid directory: {posix_rel_path}")
439 |
440 | chat_files = session.get_chat_files()
441 | print(f"Generating repomap for {session.session_path}, focusing on '{posix_rel_path}' with chat files: {chat_files}", file=sys.stderr)
442 |
443 | # --- TODO: Enhance RepoMapper ---
444 | # Currently, session.repo_mapper.generate_map likely maps the whole root.
445 | # Ideally, generate_map would accept abs_path or rel_path to focus the analysis.
446 | # For now, we proceed but the map might be broader than the requested path.
447 | # repo_map_content = session.repo_mapper.generate_map(chat_files=chat_files, target_path=abs_path) # Example of future call
448 | repo_map_content = session.repo_mapper.generate_map(chat_files=chat_files) # Current call
449 |
450 | if not repo_map_content:
451 | repo_map_content = "(No map content generated)"
452 |
453 | # Store the generated map content in the session cache
454 | session.set_last_repomap(repo_map_content)
455 |
456 | # Update success message to reflect the requested focus path
457 | return _format_tool_result(f"Repository map generated, focusing analysis around '{posix_rel_path}'.")
458 |
459 | except Exception as e:
460 | print(f"Error generating repomap for path '{posix_rel_path}': {e}\n{traceback.format_exc()}", file=sys.stderr)
461 | session.set_last_repomap(None) # Clear stored map on error
462 | return _format_tool_error(f"Error generating repository map for '{posix_rel_path}': {e}")
463 |
464 | def list_files(session: Session, parameters: Dict[str, Any]) -> str:
465 | """Lists files in a directory via Emacs."""
466 | rel_path = parameters.get("path", ".") # Default to session path root
467 | recursive = parameters.get("recursive", False) # Default to False if missing or not bool
468 |
469 | # Ensure recursive is boolean
470 | if not isinstance(recursive, bool):
471 | recursive = str(recursive).lower() == "true"
472 |
473 | abs_path = _resolve_path(session.session_path, rel_path)
474 | posix_rel_path = _posix_path(rel_path)
475 | try:
476 | # Use Emacs function to list files respecting ignores etc.
477 | files_str = get_emacs_func_result("list-files-sync", abs_path, recursive)
478 | # Elisp function should return a newline-separated string of relative paths
479 |
480 | return _format_tool_result(
481 | f"Files in '{posix_rel_path}' ({'recursive' if recursive else 'non-recursive'}):\n{files_str}"
482 | )
483 | except Exception as e:
484 | print(f"Error listing files via Emacs: {e}", file=sys.stderr)
485 | return _format_tool_error(f"Error listing files: {e}")
486 |
487 | def search_files(session: Session, parameters: Dict[str, Any]) -> str:
488 | """Searches files using Emacs's capabilities."""
489 | rel_path = parameters.get("path", ".")
490 | pattern = parameters.get("pattern")
491 | case_sensitive = parameters.get("case_sensitive", False) # Default to False
492 | max_matches_arg = parameters.get("max_matches", 50) # Default to 50
493 |
494 | if not pattern:
495 | return _format_tool_error("Missing required parameter 'pattern'")
496 |
497 | # Validate/sanitize max_matches
498 | try:
499 | max_matches = min(200, int(max_matches_arg)) # Cap at 200
500 | if max_matches <= 0:
501 | max_matches = 50 # Ensure positive, default 50
502 | except (ValueError, TypeError):
503 | max_matches = 50 # Default if conversion fails
504 |
505 | # Ensure case_sensitive is boolean
506 | if not isinstance(case_sensitive, bool):
507 | case_sensitive = str(case_sensitive).lower() == "true"
508 |
509 | abs_path = _resolve_path(session.session_path, rel_path)
510 | posix_rel_path = _posix_path(rel_path)
511 | search_scope_path = abs_path
512 | search_scope_desc = posix_rel_path
513 |
514 | try:
515 | # Check if the provided path is a file; if so, search its directory
516 | if os.path.isfile(abs_path):
517 | search_scope_path = os.path.dirname(abs_path)
518 | search_scope_desc = _posix_path(os.path.relpath(search_scope_path, session.session_path))
519 | print(f"Note: '{posix_rel_path}' is a file. Searching its directory: '{search_scope_desc}'", file=sys.stderr)
520 | elif not os.path.isdir(search_scope_path):
521 | return _format_tool_error(f"Path not found or is not a directory/file: {posix_rel_path}")
522 |
523 | # Call Emacs function to perform the search in the determined scope
524 | search_results = get_emacs_func_result(
525 | "search-files-sync", search_scope_path, pattern, case_sensitive, max_matches
526 | )
527 |
528 | if not search_results or search_results.strip() == "":
529 | return _format_tool_result(f"No matches found for pattern: {pattern} in '{search_scope_desc}'")
530 |
531 | result = f"Found matches for pattern '{pattern}' in '{search_scope_desc}':\n{search_results}"
532 | # Elisp function should ideally handle truncation notes if applicable
533 |
534 | return _format_tool_result(result)
535 |
536 | except Exception as e:
537 | print(f"Error searching files via Emacs: {e}\n{traceback.format_exc()}", file=sys.stderr)
538 | return _format_tool_error(f"Error searching files: {e}")
539 |
--------------------------------------------------------------------------------
/emigo-epc.el:
--------------------------------------------------------------------------------
1 | ;;; epcs.el --- EPC Server -*- lexical-binding: t -*-
2 |
3 | ;; Copyright (C) 2011,2012,2013 Masashi Sakurai
4 |
5 | ;; Author: Masashi Sakurai
6 | ;; Keywords: lisp
7 |
8 | ;; This program is free software; you can redistribute it and/or modify
9 | ;; it under the terms of the GNU General Public License as published by
10 | ;; the Free Software Foundation, either version 3 of the License, or
11 | ;; (at your option) any later version.
12 |
13 | ;; This program is distributed in the hope that it will be useful,
14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | ;; GNU General Public License for more details.
17 |
18 | ;; You should have received a copy of the GNU General Public License
19 | ;; along with this program. If not, see .
20 |
21 | ;;; Commentary:
22 |
23 | ;;
24 |
25 | ;;; Code:
26 |
27 | (require 'cl-lib)
28 | (require 'subr-x)
29 |
30 | ;; deferred
31 | (cl-defmacro emigo-deferred-chain (&rest elements)
32 | "Anaphoric function chain macro for deferred chains."
33 | (declare (debug (&rest form))
34 | (indent 0))
35 | `(let (it)
36 | ,@(cl-loop for i in elements
37 | collect
38 | `(setq it ,i))
39 | it))
40 |
41 | ;; Debug
42 | (defvar emigo-deferred-debug nil
43 | "Debug output switch.")
44 |
45 | (defvar emigo-deferred-debug-count 0
46 | "[internal] Debug output counter.")
47 |
48 | (defun emigo-deferred-log (&rest args)
49 | "[internal] Debug log function."
50 | (when emigo-deferred-debug
51 | (with-current-buffer (get-buffer-create "*emigo-deferred-log*")
52 | (save-excursion
53 | (goto-char (point-max))
54 | (insert (format "%5i %s\n\n\n" emigo-deferred-debug-count (apply #'format args)))))
55 | (cl-incf emigo-deferred-debug-count)))
56 |
57 | (defvar emigo-deferred-debug-on-signal nil
58 | "If non nil, the value `debug-on-signal' is substituted this
59 | value in the `condition-case' form in deferred
60 | implementations. Then, Emacs debugger can catch an error occurred
61 | in the asynchronous tasks.")
62 |
63 | (cl-defmacro emigo-deferred-condition-case (var protected-form &rest handlers)
64 | "[internal] Custom condition-case. See the comment for
65 | `emigo-deferred-debug-on-signal'."
66 | (declare (debug condition-case)
67 | (indent 1))
68 | `(let ((debug-on-signal
69 | (or debug-on-signal emigo-deferred-debug-on-signal)))
70 | (condition-case ,var
71 | ,protected-form
72 | ,@handlers)))
73 |
74 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
75 | ;; Back end functions of deferred tasks
76 |
77 | (defvar emigo-deferred-tick-time 0.001
78 | "Waiting time between asynchronous tasks (second).
79 | The shorter waiting time increases the load of Emacs. The end
80 | user can tune this parameter. However, applications should not
81 | modify it because the applications run on various environments.")
82 |
83 | (defvar emigo-deferred-queue nil
84 | "[internal] The execution queue of deferred objects.
85 | See the functions `emigo-deferred-post-task' and `emigo-deferred-worker'.")
86 |
87 | (defun emigo-deferred-post-task (d which &optional arg)
88 | "[internal] Add a deferred object to the execution queue
89 | `emigo-deferred-queue' and schedule to execute.
90 | D is a deferred object. WHICH is a symbol, `ok' or `ng'. ARG is
91 | an argument value for execution of the deferred task."
92 | (let ((pack `(,d ,which . ,arg)))
93 | (push pack emigo-deferred-queue)
94 | (emigo-deferred-log "QUEUE-POST [%s]: %s" (length emigo-deferred-queue) pack)
95 | (run-at-time emigo-deferred-tick-time nil 'emigo-deferred-worker)
96 | d))
97 |
98 | (defun emigo-deferred-worker ()
99 | "[internal] Consume a deferred task.
100 | Mainly this function is called by timer asynchronously."
101 | (when emigo-deferred-queue
102 | (let* ((pack (car (last emigo-deferred-queue)))
103 | (d (car pack))
104 | (which (cadr pack))
105 | (arg (cddr pack)) value)
106 | (setq emigo-deferred-queue (nbutlast emigo-deferred-queue))
107 | (condition-case err
108 | (setq value (emigo-deferred-exec-task d which arg))
109 | (error
110 | (emigo-deferred-log "ERROR : %s" err)
111 | (message "deferred error : %s" err)))
112 | value)))
113 |
114 | ;; Struct: emigo-deferred-object
115 | ;;
116 | ;; callback : a callback function (default `identity')
117 | ;; errorback : an errorback function (default `emigo-deferred-resignal')
118 | ;; cancel : a canceling function (default `emigo-deferred-default-cancel')
119 | ;; next : a next chained deferred object (default nil)
120 | ;; status : if 'ok or 'ng, this deferred has a result (error) value. (default nil)
121 | ;; value : saved value (default nil)
122 | ;;
123 | (cl-defstruct emigo-deferred-object
124 | (callback 'identity)
125 | (errorback 'emigo-deferred-resignal)
126 | (cancel 'emigo-deferred-default-cancel)
127 | next status value)
128 |
129 | (defun emigo-deferred-resignal (err)
130 | "[internal] Safely resignal ERR as an Emacs condition.
131 |
132 | If ERR is a cons (ERROR-SYMBOL . DATA) where ERROR-SYMBOL has an
133 | `error-conditions' property, it is re-signaled unchanged. If ERR
134 | is a string, it is signaled as a generic error using `error'.
135 | Otherwise, ERR is formatted into a string as if by `print' before
136 | raising with `error'."
137 | (cond ((and (listp err)
138 | (symbolp (car err))
139 | (get (car err) 'error-conditions))
140 | (signal (car err) (cdr err)))
141 | ((stringp err)
142 | (error "%s" err))
143 | (t
144 | (error "%S" err))))
145 |
146 | (defun emigo-deferred-default-cancel (d)
147 | "[internal] Default canceling function."
148 | (emigo-deferred-log "CANCEL : %s" d)
149 | (setf (emigo-deferred-object-callback d) 'identity)
150 | (setf (emigo-deferred-object-errorback d) 'emigo-deferred-resignal)
151 | (setf (emigo-deferred-object-next d) nil)
152 | d)
153 |
154 | (defun emigo-deferred-exec-task (d which &optional arg)
155 | "[internal] Executing deferred task. If the deferred object has
156 | next deferred task or the return value is a deferred object, this
157 | function adds the task to the execution queue.
158 | D is a deferred object. WHICH is a symbol, `ok' or `ng'. ARG is
159 | an argument value for execution of the deferred task."
160 | (emigo-deferred-log "EXEC : %s / %s / %s" d which arg)
161 | (when (null d) (error "emigo-deferred-exec-task was given a nil."))
162 | (let ((callback (if (eq which 'ok)
163 | (emigo-deferred-object-callback d)
164 | (emigo-deferred-object-errorback d)))
165 | (next-deferred (emigo-deferred-object-next d)))
166 | (cond
167 | (callback
168 | (emigo-deferred-condition-case err
169 | (let ((value (funcall callback arg)))
170 | (cond
171 | ((emigo-deferred-object-p value)
172 | (emigo-deferred-log "WAIT NEST : %s" value)
173 | (if next-deferred
174 | (emigo-deferred-set-next value next-deferred)
175 | value))
176 | (t
177 | (if next-deferred
178 | (emigo-deferred-post-task next-deferred 'ok value)
179 | (setf (emigo-deferred-object-status d) 'ok)
180 | (setf (emigo-deferred-object-value d) value)
181 | value))))
182 | (error
183 | (cond
184 | (next-deferred
185 | (emigo-deferred-post-task next-deferred 'ng err))
186 | (t
187 | (emigo-deferred-log "ERROR : %S" err)
188 | (message "deferred error : %S" err)
189 | (setf (emigo-deferred-object-status d) 'ng)
190 | (setf (emigo-deferred-object-value d) err)
191 | err)))))
192 | (t ; <= (null callback)
193 | (cond
194 | (next-deferred
195 | (emigo-deferred-exec-task next-deferred which arg))
196 | ((eq which 'ok) arg)
197 | (t ; (eq which 'ng)
198 | (emigo-deferred-resignal arg)))))))
199 |
200 | (defun emigo-deferred-set-next (prev next)
201 | "[internal] Connect deferred objects."
202 | (setf (emigo-deferred-object-next prev) next)
203 | (cond
204 | ((eq 'ok (emigo-deferred-object-status prev))
205 | (setf (emigo-deferred-object-status prev) nil)
206 | (let ((ret (emigo-deferred-exec-task
207 | next 'ok (emigo-deferred-object-value prev))))
208 | (if (emigo-deferred-object-p ret) ret
209 | next)))
210 | ((eq 'ng (emigo-deferred-object-status prev))
211 | (setf (emigo-deferred-object-status prev) nil)
212 | (let ((ret (emigo-deferred-exec-task next 'ng (emigo-deferred-object-value prev))))
213 | (if (emigo-deferred-object-p ret) ret
214 | next)))
215 | (t
216 | next)))
217 |
218 | (defun emigo-deferred-new (&optional callback)
219 | "Create a deferred object."
220 | (if callback
221 | (make-emigo-deferred-object :callback callback)
222 | (make-emigo-deferred-object)))
223 |
224 | (defun emigo-deferred-callback (d &optional arg)
225 | "Start deferred chain with a callback message."
226 | (emigo-deferred-exec-task d 'ok arg))
227 |
228 | (defun emigo-deferred-errorback (d &optional arg)
229 | "Start deferred chain with an errorback message."
230 | (declare (indent 1))
231 | (emigo-deferred-exec-task d 'ng arg))
232 |
233 | (defun emigo-deferred-callback-post (d &optional arg)
234 | "Add the deferred object to the execution queue."
235 | (declare (indent 1))
236 | (emigo-deferred-post-task d 'ok arg))
237 |
238 | (defun emigo-deferred-next (&optional callback arg)
239 | "Create a deferred object and schedule executing. This function
240 | is a short cut of following code:
241 | (emigo-deferred-callback-post (emigo-deferred-new callback))."
242 | (let ((d (if callback
243 | (make-emigo-deferred-object :callback callback)
244 | (make-emigo-deferred-object))))
245 | (emigo-deferred-callback-post d arg)
246 | d))
247 |
248 | (defun emigo-deferred-nextc (d callback)
249 | "Create a deferred object with OK callback and connect it to the given deferred object."
250 | (declare (indent 1))
251 | (let ((nd (make-emigo-deferred-object :callback callback)))
252 | (emigo-deferred-set-next d nd)))
253 |
254 | (defun emigo-deferred-error (d callback)
255 | "Create a deferred object with errorback and connect it to the given deferred object."
256 | (declare (indent 1))
257 | (let ((nd (make-emigo-deferred-object :errorback callback)))
258 | (emigo-deferred-set-next d nd)))
259 |
260 | (defvar emigo-epc-debug nil)
261 |
262 | (defun emigo-epc-log (&rest args)
263 | (when emigo-epc-debug
264 | (with-current-buffer (get-buffer-create "*emigo-epc-log*")
265 | (buffer-disable-undo)
266 | (goto-char (point-max))
267 | (insert (apply 'format args) "\n\n\n"))))
268 |
269 | (defun emigo-epc-make-procbuf (name)
270 | "[internal] Make a process buffer."
271 | (let ((buf (get-buffer-create name)))
272 | (with-current-buffer buf
273 | (set (make-local-variable 'kill-buffer-query-functions) nil)
274 | (erase-buffer) (buffer-disable-undo))
275 | buf))
276 |
277 | (defvar emigo-epc-uid 1)
278 |
279 | (defun emigo-epc-uid ()
280 | (cl-incf emigo-epc-uid))
281 |
282 | (defvar emigo-epc-accept-process-timeout 150
283 | "Asynchronous timeout time. (msec)")
284 |
285 | (put 'epc-error 'error-conditions '(error epc-error))
286 | (put 'epc-error 'error-message "EPC Error")
287 |
288 | (cl-defstruct emigo-epc-connection
289 | "Set of information for network connection and event handling.
290 |
291 | name : Connection name. This name is used for process and buffer names.
292 | process : Connection process object.
293 | buffer : Working buffer for the incoming data.
294 | channel : Event channels for incoming messages."
295 | name process buffer channel)
296 |
297 | (defun emigo-epc-connect (host port)
298 | "[internal] Connect the server, initialize the process and
299 | return emigo-epc-connection object."
300 | (emigo-epc-log ">> Connection start: %s:%s" host port)
301 | (let* ((connection-id (emigo-epc-uid))
302 | (connection-name (format "emigo-epc con %s" connection-id))
303 | (connection-buf (emigo-epc-make-procbuf (format "*%s*" connection-name)))
304 | (connection-process
305 | (open-network-stream connection-name connection-buf host port))
306 | (channel (list connection-name nil))
307 | (connection (make-emigo-epc-connection
308 | :name connection-name
309 | :process connection-process
310 | :buffer connection-buf
311 | :channel channel)))
312 | (emigo-epc-log ">> Connection establish")
313 | (set-process-coding-system connection-process 'binary 'binary)
314 | (set-process-filter connection-process
315 | (lambda (p m)
316 | (emigo-epc-process-filter connection p m)))
317 | (set-process-sentinel connection-process
318 | (lambda (p e)
319 | (emigo-epc-process-sentinel connection p e)))
320 | (set-process-query-on-exit-flag connection-process nil)
321 | connection))
322 |
323 | (defun emigo-epc-process-sentinel (connection process msg)
324 | (emigo-epc-log "!! Process Sentinel [%s] : %S : %S"
325 | (emigo-epc-connection-name connection) process msg)
326 | (emigo-epc-disconnect connection))
327 |
328 | (defun emigo-epc-net-send (connection sexp)
329 | (let* ((msg (encode-coding-string
330 | (concat (emigo-epc-prin1-to-string sexp) "\n") 'utf-8-unix))
331 | (string (concat (format "%06x" (length msg)) msg))
332 | (proc (emigo-epc-connection-process connection)))
333 | (emigo-epc-log ">> SEND : [%S]" string)
334 | (process-send-string proc string)))
335 |
336 | (defun emigo-epc-disconnect (connection)
337 | (let ((process (emigo-epc-connection-process connection))
338 | (buf (emigo-epc-connection-buffer connection))
339 | (name (emigo-epc-connection-name connection)))
340 | (emigo-epc-log "!! Disconnect [%s]" name)
341 | (when process
342 | (set-process-sentinel process nil)
343 | (delete-process process)
344 | (when (get-buffer buf) (kill-buffer buf)))
345 | (emigo-epc-log "!! Disconnected finished [%s]" name)))
346 |
347 | (defun emigo-epc-process-filter (connection process message)
348 | (emigo-epc-log "INCOMING: [%s] [%S]" (emigo-epc-connection-name connection) message)
349 | (with-current-buffer (emigo-epc-connection-buffer connection)
350 | (goto-char (point-max))
351 | (insert message)
352 | (emigo-epc-process-available-input connection process)))
353 |
354 | (defun emigo-epc-signal-connect (channel event-sym &optional callback)
355 | "Append an observer for EVENT-SYM of CHANNEL and return a deferred object.
356 | If EVENT-SYM is `t', the observer receives all signals of the channel.
357 | If CALLBACK function is given, the deferred object executes the
358 | CALLBACK function asynchronously. One can connect subsequent
359 | tasks to the returned deferred object."
360 | (let ((d (if callback
361 | (emigo-deferred-new callback)
362 | (emigo-deferred-new))))
363 | (push (cons event-sym d)
364 | (cddr channel))
365 | d))
366 |
367 | (defun emigo-epc-signal-send (channel event-sym &rest args)
368 | "Send a signal to CHANNEL. If ARGS values are given,
369 | observers can get the values by following code:
370 |
371 | (lambda (event)
372 | (destructuring-bind
373 | (event-sym (args))
374 | event ... ))
375 | "
376 | (let ((observers (cddr channel))
377 | (event (list event-sym args)))
378 | (cl-loop for i in observers
379 | for name = (car i)
380 | for d = (cdr i)
381 | if (or (eq event-sym name) (eq t name))
382 | do (emigo-deferred-callback-post d event))))
383 |
384 | (defun emigo-epc-process-available-input (connection process)
385 | "Process all complete messages that have arrived from Lisp."
386 | (with-current-buffer (process-buffer process)
387 | (while (emigo-epc-net-have-input-p)
388 | (let ((event (emigo-epc-net-read-or-lose process))
389 | (ok nil))
390 | (emigo-epc-log "<< RECV [%S]" event)
391 | (unwind-protect
392 | (condition-case err
393 | (progn
394 | (apply 'emigo-epc-signal-send
395 | (cons (emigo-epc-connection-channel connection) event))
396 | (setq ok t))
397 | ('error (emigo-epc-log "MsgError: %S / <= %S" err event)))
398 | (unless ok
399 | (emigo-epc-process-available-input connection process)))))))
400 |
401 | (defun emigo-epc-net-have-input-p ()
402 | "Return true if a complete message is available."
403 | (goto-char (point-min))
404 | (and (>= (buffer-size) 6)
405 | (>= (- (buffer-size) 6) (emigo-epc-net-decode-length))))
406 |
407 | (defun emigo-epc-net-read-or-lose (_process)
408 | (condition-case error
409 | (emigo-epc-net-read)
410 | (error
411 | (debug 'error error)
412 | (error "net-read error: %S" error))))
413 |
414 | (defun emigo-epc-net-read ()
415 | "Read a message from the network buffer."
416 | (goto-char (point-min))
417 | (let* ((length (emigo-epc-net-decode-length))
418 | (start (+ 6 (point)))
419 | (end (+ start length))
420 | _content)
421 | (cl-assert (cl-plusp length))
422 | (prog1 (save-restriction
423 | (narrow-to-region start end)
424 | (read (decode-coding-string
425 | (buffer-string) 'utf-8-unix)))
426 | (delete-region (point-min) end))))
427 |
428 | (defun emigo-epc-net-decode-length ()
429 | "Read a 24-bit hex-encoded integer from buffer."
430 | (string-to-number (buffer-substring-no-properties (point) (+ (point) 6)) 16))
431 |
432 | (defun emigo-epc-prin1-to-string (sexp)
433 | "Like `prin1-to-string' but don't octal-escape non-ascii characters.
434 | This is more compatible with the CL reader."
435 | (with-temp-buffer
436 | (let (print-escape-nonascii
437 | print-escape-newlines
438 | print-length
439 | print-level)
440 | (prin1 sexp (current-buffer))
441 | (buffer-string))))
442 |
443 | (cl-defstruct emigo-epc-manager
444 | "Root object that holds all information related to an EPC activity.
445 |
446 | `emigo-epc-start-epc' returns this object.
447 |
448 | title : instance name for displaying on the `emigo-epc-controller' UI
449 | server-process : process object for the peer
450 | commands : a list of (prog . args)
451 | port : port number
452 | connection : emigo-epc-connection instance
453 | methods : alist of method (name . function)
454 | sessions : alist of session (id . deferred)
455 | exit-hook : functions for after shutdown EPC connection"
456 | title server-process commands port connection methods sessions exit-hooks)
457 |
458 | (cl-defstruct emigo-epc-method
459 | "Object to hold serving method information.
460 |
461 | name : method name (symbol) ex: 'test
462 | task : method function (function with one argument)
463 | arg-specs : arg-specs (one string) ex: \"(A B C D)\"
464 | docstring : docstring (one string) ex: \"A test function. Return sum of A,B,C and D\"
465 | "
466 | name task docstring arg-specs)
467 |
468 | (defvar emigo-epc-live-connections nil
469 | "[internal] A list of `emigo-epc-manager' objects.
470 | those objects currently connect to the epc peer.
471 | This variable is for debug purpose.")
472 |
473 | (defun emigo-epc-server-process-name (uid)
474 | (format "emigo-epc-server:%s" uid))
475 |
476 | (defun emigo-epc-server-buffer-name (uid)
477 | (format " *%s*" (emigo-epc-server-process-name uid)))
478 |
479 | (defun emigo-epc-stop-epc (mngr)
480 | "Disconnect the connection for the server."
481 | (let* ((proc (emigo-epc-manager-server-process mngr))
482 | (buf (and proc (process-buffer proc))))
483 | (emigo-epc-disconnect (emigo-epc-manager-connection mngr))
484 | (when proc
485 | (accept-process-output proc 0 emigo-epc-accept-process-timeout t))
486 | (when (and proc (equal 'run (process-status proc)))
487 | (kill-process proc))
488 | (when buf (kill-buffer buf))
489 | (setq emigo-epc-live-connections (delete mngr emigo-epc-live-connections))
490 | ))
491 |
492 | (defun emigo-epc-args (args)
493 | "[internal] If ARGS is an atom, return it. If list, return the cadr of it."
494 | (cond
495 | ((atom args) args)
496 | (t (cadr args))))
497 |
498 | (defun emigo-epc-init-epc-layer (mngr)
499 | "[internal] Connect to the server program and return an emigo-epc-connection instance."
500 | (let* ((mngr mngr)
501 | (conn (emigo-epc-manager-connection mngr))
502 | (channel (emigo-epc-connection-channel conn)))
503 | ;; dispatch incoming messages with the lexical scope
504 | (cl-loop for (method . body) in
505 | `((call
506 | . (lambda (args)
507 | (emigo-epc-log "SIG CALL: %S" args)
508 | (apply 'emigo-epc-handler-called-method ,mngr (emigo-epc-args args))))
509 | (return
510 | . (lambda (args)
511 | (emigo-epc-log "SIG RET: %S" args)
512 | (apply 'emigo-epc-handler-return ,mngr (emigo-epc-args args))))
513 | (return-error
514 | . (lambda (args)
515 | (emigo-epc-log "SIG RET-ERROR: %S" args)
516 | (apply 'emigo-epc-handler-return-error ,mngr (emigo-epc-args args))))
517 | (epc-error
518 | . (lambda (args)
519 | (emigo-epc-log "SIG EPC-ERROR: %S" args)
520 | (apply 'emigo-epc-handler-epc-error ,mngr (emigo-epc-args args))))
521 | (methods
522 | . (lambda (args)
523 | (emigo-epc-log "SIG METHODS: %S" args)
524 | (emigo-epc-handler-methods ,mngr (caadr args))))
525 | ) do
526 | (emigo-epc-signal-connect channel method body))
527 | (push mngr emigo-epc-live-connections)
528 | mngr))
529 |
530 | (defun emigo-epc-manager-send (mngr method &rest messages)
531 | "[internal] low-level message sending."
532 | (let* ((conn (emigo-epc-manager-connection mngr)))
533 | (emigo-epc-net-send conn (cons method messages))))
534 |
535 | (defun emigo-epc-manager-get-method (mngr method-name)
536 | "[internal] Return a method object. If not found, return nil."
537 | (cl-loop for i in (emigo-epc-manager-methods mngr)
538 | if (eq method-name (emigo-epc-method-name i))
539 | do (cl-return i)))
540 |
541 | (defun emigo-epc-handler-methods (mngr uid)
542 | "[internal] Return a list of information for registered methods."
543 | (let ((info
544 | (cl-loop for i in (emigo-epc-manager-methods mngr)
545 | collect
546 | (list
547 | (emigo-epc-method-name i)
548 | (or (emigo-epc-method-arg-specs i) "")
549 | (or (emigo-epc-method-docstring i) "")))))
550 | (emigo-epc-manager-send mngr 'return uid info)))
551 |
552 | (defun emigo-epc-handler-called-method (mngr uid name args)
553 | "[internal] low-level message handler for peer's calling."
554 | (let ((mngr mngr) (uid uid))
555 | (let* ((_methods (emigo-epc-manager-methods mngr))
556 | (method (emigo-epc-manager-get-method mngr name)))
557 | (cond
558 | ((null method)
559 | (emigo-epc-log "ERR: No such method : %s" name)
560 | (emigo-epc-manager-send mngr 'epc-error uid (format "EPC-ERROR: No such method : %s" name)))
561 | (t
562 | (condition-case err
563 | (let* ((f (emigo-epc-method-task method))
564 | (ret (apply f args)))
565 | (cond
566 | ((emigo-deferred-object-p ret)
567 | (emigo-deferred-nextc ret
568 | (lambda (xx) (emigo-epc-manager-send mngr 'return uid xx))))
569 | (t (emigo-epc-manager-send mngr 'return uid ret))))
570 | (error
571 | ;; Include method name and args in error for debugging
572 | (let ((err-msg (format "FAILED in %s: %S with ERROR: %S" name args err)))
573 | (emigo-epc-log err-msg)
574 | (emigo-epc-manager-send mngr 'return-error uid err-msg)))))))))
575 |
576 | (defun emigo-epc-manager-remove-session (mngr uid)
577 | "[internal] Remove a session from the epc manager object."
578 | (cl-loop with ret = nil
579 | for pair in (emigo-epc-manager-sessions mngr)
580 | unless (eq uid (car pair))
581 | do (push pair ret)
582 | finally
583 | do (setf (emigo-epc-manager-sessions mngr) ret)))
584 |
585 | (defun emigo-epc-handler-return (mngr uid args)
586 | "[internal] low-level message handler for normal returns."
587 | (let ((pair (assq uid (emigo-epc-manager-sessions mngr))))
588 | (cond
589 | (pair
590 | (emigo-epc-log "RET: id:%s [%S]" uid args)
591 | (emigo-epc-manager-remove-session mngr uid)
592 | (emigo-deferred-callback (cdr pair) args))
593 | (t ; error
594 | (emigo-epc-log "RET: NOT FOUND: id:%s [%S]" uid args)))))
595 |
596 | (defun emigo-epc-handler-return-error (mngr uid args)
597 | "[internal] low-level message handler for application errors."
598 | (let ((pair (assq uid (emigo-epc-manager-sessions mngr)))
599 | (cond
600 | (pair
601 | (emigo-epc-log "RET-ERR: id:%s [%S]" uid args)
602 | (emigo-epc-manager-remove-session mngr uid)
603 | (let* ((err-str (format "%S" args))
604 | ;; Add context about the failed call if available
605 | (when (and (listp args) (eq (car args) 'error))
606 | (setq err-str (format "EPC call failed: %S" args)))
607 | (emigo-deferred-errorback (cdr pair) err-str))))
608 | (t ; error
609 | (emigo-epc-log "RET-ERR: NOT FOUND: id:%s [%S]" uid args))))))
610 |
611 | (defun emigo-epc-handler-epc-error (mngr uid args)
612 | "[internal] low-level message handler for epc errors."
613 | (let ((pair (assq uid (emigo-epc-manager-sessions mngr))))
614 | (cond
615 | (pair
616 | (emigo-epc-log "RET-EPC-ERR: id:%s [%S]" uid args)
617 | (emigo-epc-manager-remove-session mngr uid)
618 | (emigo-deferred-errorback (cdr pair) (list 'epc-error args)))
619 | (t ; error
620 | (emigo-epc-log "RET-EPC-ERR: NOT FOUND: id:%s [%S]" uid args)))))
621 |
622 | (defun emigo-epc-call-deferred (mngr method-name args)
623 | "Call peer's method with args asynchronously. Return a deferred
624 | object which is called with the result."
625 | (let ((uid (emigo-epc-uid))
626 | (sessions (emigo-epc-manager-sessions mngr))
627 | (d (emigo-deferred-new)))
628 | (push (cons uid d) sessions)
629 | (setf (emigo-epc-manager-sessions mngr) sessions)
630 | (emigo-epc-manager-send mngr 'call uid method-name args)
631 | d))
632 |
633 | (defun emigo-epc-define-method (mngr method-name task &optional arg-specs docstring)
634 | "Define a method and return a deferred object which is called by the peer."
635 | (let* ((method (make-emigo-epc-method
636 | :name method-name :task task
637 | :arg-specs arg-specs :docstring docstring))
638 | (methods (cons method (emigo-epc-manager-methods mngr))))
639 | (setf (emigo-epc-manager-methods mngr) methods)
640 | method))
641 |
642 | (defun emigo-epc-sync (mngr d)
643 | "Wrap deferred methods with synchronous waiting, and return the result.
644 | If an exception is occurred, this function throws the error."
645 | (let ((result 'emigo-epc-nothing))
646 | (emigo-deferred-chain
647 | d
648 | (emigo-deferred-nextc it
649 | (lambda (x) (setq result x)))
650 | (emigo-deferred-error it
651 | (lambda (er) (setq result (cons 'error er)))))
652 | (while (eq result 'emigo-epc-nothing)
653 | (save-current-buffer
654 | (accept-process-output
655 | (emigo-epc-connection-process (emigo-epc-manager-connection mngr))
656 | 0 emigo-epc-accept-process-timeout t)))
657 | (if (and (consp result) (eq 'error (car result)))
658 | (error (cdr result)) result)))
659 |
660 | (defun emigo-epc-call-sync (mngr method-name args)
661 | "Call peer's method with args synchronously and return the result.
662 | If an exception is occurred, this function throws the error."
663 | (emigo-epc-sync mngr (emigo-epc-call-deferred mngr method-name args)))
664 |
665 | (defun emigo-epc-live-p (mngr)
666 | "Return non-nil when MNGR is an EPC manager object with a live
667 | connection."
668 | (let ((proc (ignore-errors
669 | (emigo-epc-connection-process (emigo-epc-manager-connection mngr)))))
670 | (and (processp proc)
671 | ;; Same as `process-live-p' in Emacs >= 24:
672 | (memq (process-status proc) '(run open listen connect stop)))))
673 |
674 | ;; epcs
675 | (defvar emigo-epc-server-client-processes nil
676 | "[internal] A list of ([process object] . [`emigo-epc-manager' instance]).
677 | When the server process accepts the client connection, the
678 | `emigo-epc-manager' instance is created and stored in this variable
679 | `emigo-epc-server-client-processes'. This variable is used for the management
680 | purpose.")
681 |
682 | ;; emigo-epc-server
683 | ;; name : process name (string) ex: "EPC Server 1"
684 | ;; process : server process object
685 | ;; port : port number
686 | ;; connect-function : initialize function for `emigo-epc-manager' instances
687 | (cl-defstruct emigo-epc-server name process port connect-function)
688 |
689 | (defvar emigo-epc-server-processes nil
690 | "[internal] A list of ([process object] . [`emigo-epc-server' instance]).
691 | This variable is used for the management purpose.")
692 |
693 | (defun emigo-epc-server-get-manager-by-process (proc)
694 | "[internal] Return the emigo-epc-manager instance for the PROC."
695 | (cl-loop for (pp . mngr) in emigo-epc-server-client-processes
696 | if (eql pp proc)
697 | do (cl-return mngr)
698 | finally return nil))
699 |
700 | (defun emigo-epc-server-accept (process)
701 | "[internal] Initialize the process and return emigo-epc-manager object."
702 | (emigo-epc-log "EMIGO-EPC-SERVER- >> Connection accept: %S" process)
703 | (let* ((connection-id (emigo-epc-uid))
704 | (connection-name (format "emigo-epc con %s" connection-id))
705 | (channel (list connection-name nil))
706 | (connection (make-emigo-epc-connection
707 | :name connection-name
708 | :process process
709 | :buffer (process-buffer process)
710 | :channel channel)))
711 | (emigo-epc-log "EMIGO-EPC-SERVER- >> Connection establish")
712 | (set-process-coding-system process 'binary 'binary)
713 | (set-process-filter process
714 | (lambda (p m)
715 | (emigo-epc-process-filter connection p m)))
716 | (set-process-query-on-exit-flag process nil)
717 | (set-process-sentinel process
718 | (lambda (p e)
719 | (emigo-epc-process-sentinel connection p e)))
720 | (make-emigo-epc-manager :server-process process :port t
721 | :connection connection)))
722 |
723 | (defun emigo-epc-server-sentinel (process message connect-function)
724 | "[internal] Process sentinel handler for the server process."
725 | (emigo-epc-log "EMIGO-EPC-SERVER- SENTINEL: %S %S" process message)
726 | (let ((mngr (emigo-epc-server-get-manager-by-process process)))
727 | (cond
728 | ;; new connection
729 | ((and (string-match "open" message) (null mngr))
730 | (condition-case err
731 | (let ((mngr (emigo-epc-server-accept process)))
732 | (push (cons process mngr) emigo-epc-server-client-processes)
733 | (emigo-epc-init-epc-layer mngr)
734 | (when connect-function (funcall connect-function mngr))
735 | mngr)
736 | ('error
737 | (emigo-epc-log "EMIGO-EPC-SERVER- Protocol error: %S" err)
738 | (emigo-epc-log "EMIGO-EPC-SERVER- ABORT %S" process)
739 | (delete-process process))))
740 | ;; ignore
741 | ((null mngr) nil )
742 | ;; disconnect
743 | (t
744 | (let ((pair (assq process emigo-epc-server-client-processes)) _d)
745 | (when pair
746 | (emigo-epc-log "EMIGO-EPC-SERVER- DISCONNECT %S" process)
747 | (emigo-epc-stop-epc (cdr pair))
748 | (setq emigo-epc-server-client-processes
749 | (assq-delete-all process emigo-epc-server-client-processes))
750 | ))
751 | nil))))
752 |
753 | (defun emigo-epc-server-start (connect-function &optional port)
754 | "Start TCP Server and return the main process object."
755 | (let*
756 | ((connect-function connect-function)
757 | (name (format "EMIGO EPC Server %s" (emigo-epc-uid)))
758 | (buf (emigo-epc-make-procbuf (format " *%s*" name)))
759 | (main-process
760 | (make-network-process
761 | :name name
762 | :buffer buf
763 | :family 'ipv4
764 | :server t
765 | :host "127.0.0.1"
766 | :service (or port t)
767 | :noquery t
768 | :sentinel
769 | (lambda (process message)
770 | (emigo-epc-server-sentinel process message connect-function)))))
771 | (push (cons main-process
772 | (make-emigo-epc-server
773 | :name name :process main-process
774 | :port (process-contact main-process :service)
775 | :connect-function connect-function))
776 | emigo-epc-server-processes)
777 | main-process))
778 |
779 | (provide 'emigo-epc)
780 | ;;; emigo-epc.el ends here
781 |
--------------------------------------------------------------------------------