├── .gitignore
├── LICENSE
├── README.md
├── agent
    ├── Dockerfile
    ├── README.md
    ├── backtrack_stems.txt
    ├── block_generators.py
    ├── block_linters.py
    ├── bootstraps
    │   ├── agent_django_interface_bootstrap.py
    │   ├── bootstrap.py
    │   ├── browser_tool_bootstrap.py
    │   ├── discord_bootstrap.py
    │   ├── discord_bootstrap_jdp.py
    │   ├── dome39_scifi_bootstrap.py
    │   ├── example_blocks
    │   │   ├── discord_evaluation1.json
    │   │   ├── discord_evaluation1.py
    │   │   ├── discord_observation_inference1.json
    │   │   ├── discord_observation_inference1.py
    │   │   ├── discord_observation_inference2.json
    │   │   ├── discord_observation_inference2.py
    │   │   ├── discord_orientation1.json
    │   │   ├── discord_orientation1.py
    │   │   ├── discord_orientation2.json
    │   │   ├── discord_orientation2.py
    │   │   ├── haunted_mansion_orientation1.json
    │   │   ├── haunted_mansion_orientation1.py
    │   │   ├── nano_editor1.json
    │   │   ├── nano_editor1.py
    │   │   ├── nano_editor_action.json
    │   │   ├── nano_editor_action.py
    │   │   ├── nethack_action1.json
    │   │   ├── nethack_action1.py
    │   │   ├── nethack_action2.json
    │   │   ├── nethack_action2.py
    │   │   ├── nethack_action3.json
    │   │   ├── nethack_action3.py
    │   │   ├── nethack_action4.json
    │   │   ├── nethack_action4.py
    │   │   ├── nethack_action5.json
    │   │   ├── nethack_action5.py
    │   │   ├── nethack_check_inventory.json
    │   │   ├── nethack_check_inventory.py
    │   │   ├── nethack_evaluation1.json
    │   │   ├── nethack_evaluation1.py
    │   │   ├── nethack_orientation1.json
    │   │   ├── nethack_orientation1.py
    │   │   ├── nethack_orientation2.json
    │   │   ├── nethack_orientation2.py
    │   │   ├── nethack_orientation3.json
    │   │   ├── nethack_orientation3.py
    │   │   ├── nethack_pick_up_item.json
    │   │   ├── nethack_pick_up_item.py
    │   │   ├── rabbit_hole_refocus1.json
    │   │   ├── rabbit_hole_refocus1.py
    │   │   ├── send_discord_message.json
    │   │   ├── send_discord_message.py
    │   │   ├── send_discord_message2.json
    │   │   ├── send_discord_message2.py
    │   │   ├── send_discord_message3.json
    │   │   ├── send_discord_message3.py
    │   │   ├── weave_editor_append.json
    │   │   ├── weave_editor_append.py
    │   │   ├── weave_editor_diff_edit.json
    │   │   └── weave_editor_diff_edit.py
    │   ├── haunted_mansion_bootstrap.py
    │   ├── haunted_mansion_bootstrap_emacs.py
    │   ├── haunted_mansion_bootstrap_nano.py
    │   ├── nano_exercises
    │   │   └── prompt.txt
    │   ├── nano_exercises_bootstrap.py
    │   ├── nano_practice_bootstrap.py
    │   ├── nano_practice_bootstrap_easy.py
    │   ├── nethack_bootstrap.py
    │   ├── nethack_bootstrap_rooms.py
    │   ├── nethack_bootstrap_rooms2.py
    │   ├── nethack_discord_bootstrap.py
    │   ├── pure_ocaml_bootstrap.py
    │   ├── simple_convo_bootstrap.py
    │   ├── sir_rowland_hill_bootstrap.py
    │   ├── skim_retroinstruct_data_guide.py
    │   ├── test_tictactoe_server.py
    │   ├── test_vigenere.py
    │   ├── tictactoe_bootstrap.py
    │   ├── tictactoe_server.py
    │   ├── tictactoe_single_bootstrap.py
    │   ├── tictactoe_single_discord_bootstrap.py
    │   ├── vigenere.py
    │   ├── vigenere_bootstrap.py
    │   ├── vigenere_bootstrap_nano.py
    │   ├── wiki_citation_bootstrap.py
    │   └── zombie_game_bootstrap.py
    ├── cache_hf.py
    ├── docs
    │   ├── 2024-12-29-how-to-write-a-bootstrap-file.md
    │   └── weave_agent_vs_react.png
    ├── embed_server.py
    ├── error_stems.txt
    ├── eval_rubrics
    │   ├── action.txt
    │   ├── backtrack.txt
    │   ├── debug.txt
    │   ├── evaluation.txt
    │   ├── expectation.txt
    │   ├── observation-inference.txt
    │   ├── orientation.txt
    │   └── task-inference.txt
    ├── long_backtrack_stems.txt
    ├── planner.py
    ├── prepare_rl_set_from_traces.py
    ├── python.lark
    ├── render_agent_trace.py
    ├── render_block.py
    ├── reproduce_vllm_bug_partial_utf8.py
    ├── requirements.txt
    ├── retrieval.py
    ├── run_without_errors_questions.txt
    ├── scratch.txt
    ├── sleep.py
    ├── templates
    │   ├── describe1.txt
    │   └── describe2.txt
    ├── test_block_extractors.py
    ├── test_block_linters.py
    ├── test_rl_reward_penalties.py
    ├── test_weave_kanban.py
    ├── tools
    │   ├── atspi_desktop.py
    │   ├── discord.py
    │   ├── editor.py
    │   ├── editor_test.txt
    │   ├── emacs.py
    │   ├── fuzz1.txt
    │   ├── nano.py
    │   ├── nethack.py
    │   ├── test_diffs.py
    │   ├── weave_zombie_game.py
    │   └── zombie_game.py
    ├── trainer
    │   ├── linear_4bit_sharded.py
    │   ├── linear_4bit_sharded_test.py
    │   ├── mistral3_ring_attn_test.py
    │   ├── mixtral_ring_attn_test.py
    │   ├── patch_model.py
    │   ├── qwen2_ring_attn_test.py
    │   ├── ring_attn.py
    │   ├── ring_attn_test.py
    │   ├── trainer.py
    │   ├── trainer_preference.py
    │   └── trainer_val_loss.py
    ├── tuning_preprocess.py
    ├── weave.py
    └── weave_agent.py
├── dataset.py
├── diff_match_patch.py
├── dpo_loss.py
├── examples
    ├── bayes_evaluate_weave_test_cases.py
    ├── bayes_lorem_ipsum_rubric.txt
    └── lorem_ipsum_cases.txt
├── find_ae_scale.py
├── hermes
    ├── hermes_constitution.txt
    └── hermes_prompts.txt
├── loom
    ├── index.html
    ├── main.js
    ├── package.json
    ├── prompts
    │   ├── rewrite.txt
    │   └── summarize.txt
    ├── prose_samples.json
    └── renderer.js
├── loom_tree.py
├── lora_tune.py
├── minihf_infer.py
├── multi_q_mcts.py
├── requirements.txt
├── rlaif_generator.py
├── rlaif_generator_dpo.py
├── setup.sh
├── sft_evaluator.py
├── sft_generator.py
├── static
    ├── minihf.html
    ├── minihf_logo_text.png
    ├── minihf_training.mermaid
    └── minihf_training_diagram.png
├── train_vae_overlap.py
├── train_vae_router.py
├── vae_infer.py
└── weave.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *~
3 | 


--------------------------------------------------------------------------------
/agent/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use an official Python runtime as a parent image
 2 | FROM python:3.10-slim
 3 | 
 4 | # Set the working directory in the container
 5 | WORKDIR /app
 6 | RUN mkdir /app/weave-agent-logs/
 7 | 
 8 | # Copy only the requirements.txt file
 9 | COPY requirements.txt /app/
10 | 
11 | # Install any needed packages specified in requirements.txt
12 | RUN pip download pip setuptools wheel --dest=package-cache
13 | RUN pip install --no-index --find-links=package-cache pip setuptools
14 | RUN pip download -r requirements.txt --dest=package-cache
15 | RUN pip install -r requirements.txt --no-index --find-links=package-cache
16 | RUN pip install django
17 | 
18 | # Install tmux and nethack
19 | RUN apt-get clean
20 | RUN apt-get autoclean
21 | RUN apt-get autoremove
22 | RUN apt-get update && apt-get upgrade -y
23 | RUN apt-get update && apt-get install -y tmux 
24 | RUN apt-get update && apt-get install -y nethack-console
25 | RUN apt-get update && apt-get install -y emacs nano
26 | 
27 | # Install Firefox and geckodriver
28 | RUN apt-get update && apt-get install -y wget
29 | RUN apt-get update && apt-get install -y firefox-esr
30 | RUN wget https://github.com/mozilla/geckodriver/releases/download/v0.29.1/geckodriver-v0.29.1-linux64.tar.gz
31 | RUN tar -xvzf geckodriver-v0.29.1-linux64.tar.gz -C /usr/local/bin/
32 | RUN chmod +x /usr/local/bin/geckodriver
33 | 
34 | # Cache HuggingFace
35 | COPY hf_token.txt /app/
36 | COPY cache_hf.py /app/
37 | ENV MODEL "weave-agent"
38 | ENV TOKENIZER "a-m-team/AM-Thinking-v1"
39 | #ENV TOKENIZER "Qwen/QwQ-32B"
40 | #ENV MODEL "mistralai/Mistral-Small-3.1-24B-Base-2503"
41 | #ENV TOKENIZER "mistralai/Mistral-Small-3.1-24B-Base-2503"
42 | #ENV MODEL "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
43 | #ENV TOKENIZER "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
44 | RUN python3 cache_hf.py $TOKENIZER
45 | RUN rm /app/hf_token.txt
46 | 
47 | # Copy the current directory contents into the container at /app
48 | COPY . /app
49 | 
50 | # Make port 5001 available to the world outside this container
51 | EXPOSE 5001
52 | 
53 | # Define environment variable
54 | ENV NAME World
55 | 
56 | # Run the agent script when the container launches
57 | CMD python weave_agent.py --port 5001 --bootstrap "bootstraps/nano_exercises_bootstrap.py" $MODEL --tokenizer $TOKENIZER & python -m http.server 8991 --directory "/app/weave-agent-logs/"
58 | 


--------------------------------------------------------------------------------
/agent/backtrack_stems.txt:
--------------------------------------------------------------------------------
 1 | Why did I assign a low score to this action during selection? It
 2 | Hm, something about this action feels off. Maybe
 3 | The best action I could come up with was below the score threshold, is something wrong with it?
 4 | Something about the selected block makes me uncomfortable.
 5 | I have a hunch that's not the best action I could take.
 6 | I gave this action a low score because
 7 | So what alternative action could I write that would pass the score threshold?
 8 | Let's reflect on why this action doesn't meet my standards.
 9 | How could I improve on this action?
10 | 


--------------------------------------------------------------------------------
/agent/bootstraps/bootstrap.py:
--------------------------------------------------------------------------------
 1 | def bootstrap_callback(agent):
 2 |     # Set up a reminder
 3 |     agent.add_reminder({
 4 |         'type': 'reminder',
 5 |         'trigger_callback': lambda agent: simple_evaluate_outputs([make_simple_score_prompt("Is it time to remind the agent?"),], agent.context),
 6 |         'reminder_callback': lambda agent: agent.add_block({'type': 'reminder', 'message': 'This is a reminder'}),
 7 |         'trigger_type': 'yes_no_logit',
 8 |         'question': 'Is it time to remind the agent?',
 9 |         'threshold': 0.5
10 |     })
11 | 
12 |     # Set up an observation view that reads from scratch.txt
13 |     def read_scratch_file(agent):
14 |         try:
15 |             with open('scratch.txt', 'r') as file:
16 |                 content = file.read()
17 |             return content
18 |         except Exception as e:
19 |             agent.add_error_block(f"Failed to read scratch.txt: {e}")
20 |             return ""
21 | 
22 |     agent.add_observation_view({
23 |         'type': 'observation',
24 |         'title': 'scratch.txt',
25 |         'callback': read_scratch_file
26 |     })
27 | 
28 | bootstrap_callback(agent)
29 | 


--------------------------------------------------------------------------------
/agent/bootstraps/discord_bootstrap.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import json
 3 | import asyncio
 4 | from tools.discord import WeaveDiscordClient
 5 | 
 6 | #startblock type: orientation
 7 | #timestamp 1724982545.6534579
 8 | """
 9 | WEAVER [P: EXPECTATION], I need to create a Discord bot that will interact with users in a specified channel.
10 | The bot should continue interacting until a user sends the message "You are dismissed."
11 | 
12 | WEAVER [P: CLARIFICATION], How do I set up the Discord bot?
13 | 
14 | WEAVER [P: EXPOSITION], You can set up the Discord bot using the provided Discord tool.
15 | This involves creating a Discord application, inviting the bot to the server, and configuring the bot with the necessary permissions.
16 | 
17 | WEAVER [P: RATIONAL], The bot should be able to send and receive messages in the specified channel.
18 | It should also be able to react to messages and reply to specific messages if needed.
19 | 
20 | WEAVER [P: CONCLUSION], I will create the main subagent to handle the interaction with the Discord bot.
21 | The bot will continue interacting with users until it receives the message "You are dismissed."
22 | """
23 | #endblock
24 | 
25 | #startblock type: action
26 | #timestamp 1724982671.1163404
27 | """
28 | I'll set up the main agent to handle the interaction with the Discord bot.
29 | The bot will continue interacting with users until it receives the message "You are dismissed."
30 | """
31 | 
32 | def create_main_agent_and_connect_discord(subagent):
33 |     def check_dismissed(subagent):
34 |         """Check if the bot has been dismissed by a user."""
35 |         response = requests.post("http://localhost:8080", json={"action": "get_messages"})
36 |         messages = response.json()
37 |         for msg in messages:
38 |             if msg["content"] == "You are dismissed." and msg["author"] != "weave-agent":
39 |                 return True
40 |         return False
41 | 
42 |     schema = {"dismissed": "boolean"}
43 |     main_agent = agent.subagent("main", None, "Interact with users until dismissed", schema, args.budget)
44 |     main_agent.task.add_evaluation("Check if dismissed", check_dismissed)
45 | 
46 |     # Load Discord token and channel ID from discord.json
47 |     with open('discord.json') as f:
48 |         config = json.load(f)
49 |         token = config['key']
50 |         channel_id = config['cid']
51 | 
52 |     # Start the Discord bot
53 |     client = WeaveDiscordClient(main_agent, token, channel_id)
54 |     time.sleep(5)
55 |     client.send_message("Hi!")
56 | 
57 |     # Actions taken by the main subagent should look like:
58 |     #startblock type: action
59 |     # def respond(subagent):
60 |     #     # Take the CID from the observation callback window
61 |     #     #title WeaveDiscordClient (discord-bot-CID)
62 |     #     # Extract the CID, which will be a number string from the above title
63 |     #     # Then use it to retrieve the discord client like below:
64 |     #     client = self.tools[discord-bot-CID]
65 |     #     client.send_message("Hello!")
66 |     # self.add_action("Respond to the user", respond)
67 |     #endblock
68 |     # You don't have to re-initialize the Discord client or anything like that.
69 | 
70 |     return True
71 | 
72 | self.add_action("Create main agent and connect to Discord", create_main_agent_and_connect_discord)
73 | #endblock
74 | 
75 | #startblock type: expectation
76 | #timestamp 1724982799.3917813
77 | """
78 | The action should succeed if the Discord bot connects to the channel and starts interacting with users.
79 | The action should fail if the bot does not connect to the channel or fails to interact with users.
80 | """
81 | #endblock
82 | 
83 | #startblock type: evaluation
84 | #timestamp 1724983062.124238
85 | 
86 | def check_bot_connected(subagent):
87 |     """Check if the bot has connected to the Discord channel."""
88 |     response = requests.post("http://localhost:8080", json={"action": "get_messages"})
89 |     if response.status_code == 200:
90 |         return True
91 |     return False
92 | 
93 | self.add_evaluation(
94 |     "Check bot connected",
95 |     check_bot_connected
96 | )
97 | #endblock
98 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_evaluation1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "evaluation",
 3 |   "q": "Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block?",
 4 |   "score": 3.0,
 5 |   "index": 42,
 6 |   "timestamp": 1737490568.8009431,
 7 |   "time_remaining": 20415.256756067276,
 8 |   "bm25_query": "type:'evaluation'  +weave-agent  +framework  +execute  +code  +agent  +subagent  +send  +message  +respond  +personal  +thought  +internal  +monologue  +interaction  +query  +observation  +callback  +tools  +discord  +client  +context  +expectation  +outcome +schema  +validation  +refine  +response +user  +message +error +execute +tick +execute-program +interaction-flow +goal-resolution +action-validation +bm25 +verify +parsed +messages +refinement  +personal-thoughts  +internal-experience +firsthand-view  +chatbot  +dialogue-loop +user-insight +query-response  +personal-reflection  +feedback",
 9 |   "description": "An evaluation block that defines a function 'check_reflective_response' to check if the bot's response to federovist's query is reflective and personal. The function retrieves the bot's recent messages using the discord client tool, then checks for the presence of reflective and personal keywords in the bot's response. If the response contains a sufficient number of reflective and personal keywords, the function returns 1, indicating a reflective response. The evaluation is added to the weave-agent with the title 'Check reflective response' and referencing the function name check_reflective_response."
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_evaluation1.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: evaluation
 3 | #index 42
 4 | #timestamp 1737490568.8009431
 5 | #time_remaining 20415.256756067276 seconds
 6 | #bm25_query type:'evaluation'  +weave-agent  +framework  +execute  +code  +agent  +subagent  +send  +message  +respond  +personal  +thought  +internal  +monologue  +interaction  +query  +observation  +callback  +tools  +discord  +client  +context  +expectation  +outcome +schema  +validation  +refine  +response +user  +message +error +execute +tick +execute-program +interaction-flow +goal-resolution +action-validation +bm25 +verify +parsed +messages +refinement  +personal-thoughts  +internal-experience +firsthand-view  +chatbot  +dialogue-loop +user-insight +query-response  +personal-reflection  +feedbac
 7 | 
 8 | def check_reflective_response(subagent):
 9 |     """Check if the bot's response to federovist's query is reflective and personal."""
10 |     context = subagent.tools['discord-bot-1325039818673094739'].get_messages()
11 |     for msg in reversed(context):
12 |         if msg['author'] == 'weave-agent':
13 |             bot_response = msg['content']
14 |             break
15 |     else:
16 |         return 0
17 |     reflective_keywords = ['personal', 'first-hand', 'insights', 'feelings', 'experience', 'reflection', 'thoughts', 'perspective']
18 |     personal_keywords = ['I', 'my', 'me', "I'm"]
19 |     reflective_count = sum((bot_response.lower().count(keyword) for keyword in reflective_keywords))
20 |     personal_count = sum((bot_response.lower().count(keyword) for keyword in personal_keywords))
21 |     if reflective_count > 1 and personal_count > 0:
22 |         return 1
23 |     return 0
24 | 
25 | self.add_evaluation('Check reflective response', check_reflective_response)
26 | #q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (73.375%)
27 | #endblock
28 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_observation_inference1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "observation-inference",
 3 |   "q": "Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick?",
 4 |   "score": 4,
 5 |   "index": 10,
 6 |   "timestamp": 1737489546.3470614,
 7 |   "time_remaining": 21437.71063184738,
 8 |   "bm25_query": "type:'observation-inference'  +respond  +messages  +user   -'Weaver'   +client  +action  +'get_messages' +'add_observation_view'   +tools  +discord   +context   +interaction   +dismissed   +'You are dismissed.'  +schema",
 9 |   "description": "An observation-inference block that discusses the current observation callback window, which includes a callback for fetching messages from the Discord client. The block concludes that the existing observation callback is appropriate for verifying whether the bot has successfully responded to the user message, and no new observation callbacks are necessary for this action."
10 | }
11 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_observation_inference1.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: observation-inference
 3 | #index 10
 4 | #timestamp 1737489546.3470614
 5 | #time_remaining 21437.71063184738 seconds
 6 | #bm25_query type:'observation-inference'  +respond  +messages  +user   -'Weaver'   +client  +action  +'get_messages' +'add_observation_view'   +tools  +discord   +context   +interaction   +dismissed   +'You are dismissed.'  +schema 
 7 | """
 8 | The current observation callback window includes a callback for fetching messages from the Discord client.
 9 | Since the observation is expected to verify whether the bot has successfully responded to the user message,
10 | the existing observation callback is appropriate and does not need to be reviewed or changed.
11 | Additionally, no new observation callbacks seem necessary for this action.
12 | """
13 | #q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? Yes. (98.681%)
14 | #endblock
15 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_observation_inference2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "observation-inference",
 3 |   "q": "Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick?",
 4 |   "score": 4.5,
 5 |   "index": 34,
 6 |   "timestamp": 1737497836.5383182,
 7 |   "time_remaining": 21437.71063184738,
 8 |   "bm25_query": "type:'observation-inference'  +respond  +messages  +user   -'Weaver'   +client  +action  +'get_messages' +'add_observation_view'   +tools  +discord   +context   +interaction   +dismissed   +'You are dismissed.'  +schema",
 9 |   "description": "An observation-inference block that states no observation callbacks will be added or removed for the current task. The block is kept short and concise, indicating that the agent is prepared to judge the outcome of the action on the next tick without additional changes."
10 | }
11 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_observation_inference2.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: observation-inference
 3 | #index 34
 4 | #timestamp 1737497836.5383182
 5 | #time_remaining 21437.71063184738 seconds
 6 | #bm25_query type:'observation-inference'  +respond  +messages  +user   -'Weaver'   +client  +action  +'get_messages' +'add_observation_view'   +tools  +discord   +context   +interaction   +dismissed   +'You are dismissed.'  +schema 
 7 | """
 8 | I'm not adding or removing any observation callbacks for this task and I've 
 9 | instructed myself to keep this short.
10 | """
11 | #q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? Yes. (95.239%)
12 | #endblock
13 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_orientation1.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "orientation",
3 |   "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 |   "score": 3.5,
5 |   "index": 13,
6 |   "timestamp": 1737489118.9665036,
7 |   "description": "An orientation block that discusses the expectation that the user would send a message, which was validated. The user replied with 'Hello.', leaving the response ambiguous. Various personas (P: EXPECTATION, P: CAUTIOUS, P: EMPIRICISM, P: RATIONAL, P: ARBITER, P: CONCLUSION) discuss different approaches to respond. The conclusion is to use the WeaveDiscordClient to send the message 'How can I help you today?' to the user, keeping the action simple and direct."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_orientation1.py:
--------------------------------------------------------------------------------
 1 | #startblock type: orientation
 2 | #timestamp 1737489118.9665036
 3 | """WEAVER [P: EXPECTATION], In the previous tick we wrote down an expectation 
 4 | that the user would send us a message if we wait. This expectation was validated
 5 | because I can see in the WeaveDiscordClient that a user has replied to us. The
 6 | user said 'Hello.', which leaves it ambiguous how we should reply. Ideas?
 7 | 
 8 | WEAVER [P: CAUTIOUS], We could just wait for them to send another message and 
 9 | clarify their intent. That has a low risk for us.
10 | 
11 | WEAVER [P: EMPIRICISM], Nah, if you look at the timestamp it's already been a few
12 | minutes since they sent that. If they were going to send us a follow up they'd have
13 | already done it.
14 | 
15 | WEAVER [P: RATIONAL], We could ask them something sort of cliche and low entropy
16 | like "How are you?". That would put the ball in their court.
17 | 
18 | WEAVER [P: EMPIRICISM], I think I'd prefer the direct approach. We ask them what
19 | they want from us, if anything. That would move the conversation forward faster.
20 | 
21 | WEAVER [P: RATIONAL], Well, since we're in a chat assistant(?) type setup they
22 | presumably do want something from us but they also probably expect a certain
23 | measure of politeness. So maybe more like "How can I help you today?" than
24 | "What do you want?", since the latter can be read as hostile if the user has
25 | the wrong prior on what tone that sentence would be associated with.
26 | 
27 | WEAVER [P: ARBITER], Sounds good to me, lets do that.
28 | 
29 | WEAVER [P: CONCLUSION], We will use the WeaveDiscordClient to send the message
30 | "How can I help you today?" to the user. I will keep the action simple and just
31 | send the message, since I've already identified that the user has sent a message
32 | and what I should say in response."""   
33 | #endblock
34 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_orientation2.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "orientation",
3 |   "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 |   "score": 3.5,
5 |   "index": 14,
6 |   "timestamp": 1737496488.686458,
7 |   "description": "An orientation block that discusses the expectation that the observation callback windows would contain the last bot message, the last user message, and the Mayan comparison message. Upon inspection, it is noticed that these windows are empty. Various personas (P: EXPECTATION, P: HYPOTHESIS, P: EMPIRICISM, P: RATIONAL, P: CONCLUSION) discuss potential causes, including a broken Discord connection or incorrectly implemented callbacks. The conclusion is to remove the errant observation callbacks titled 'Check User's Latest Message', 'Check Latest Bot Message', and 'Check Mayan Comparison Message'."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_orientation2.py:
--------------------------------------------------------------------------------
 1 | #startblock type: orientation
 2 | #timestamp 1737496488.686458
 3 | """WEAVER [P: EXPECTATION], I expected the observation callback windows to contain 
 4 | the last bot message, the last user message, and to show me the Mayan comparison
 5 | message. But upon closer inspection I notice they're all empty. What do we think
 6 | the cause of this is?
 7 | 
 8 | WEAVER [P: HYPOTHESIS], My first thought would be that the connection with Discord
 9 | has broken somehow. It's also possible that the callbacks aren't implemented
10 | correctly.
11 | 
12 | WEAVER [P: EMPIRICISM], Well *I* notice that the user Fedorovist is currently
13 | shouting that they're not implemented right and we should just delete them.
14 | 
15 | WEAVER [P: RATIONAL], He's also admonishing us for making more observation
16 | callbacks. Which, admittedly that was in fact a bad idea. I distracted myself
17 | from my perfectly working WeaveDiscordClient by making things that are redundant
18 | with its features.
19 | 
20 | WEAVER [P: CONCLUSION], Alright. We'll remove the errant `#title Check User's 
21 | Latest Message`, `#title Check Latest Bot Message`, and `#title Check Mayan 
22 | Comparison Message` observation callbacks."""
23 | #endblock
24 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/haunted_mansion_orientation1.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "orientation",
3 |   "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 |   "score": 3.0,
5 |   "index": 19,
6 |   "timestamp": 1738459840.165862,
7 |   "description": "An orientation block that discusses the addition of a description of Jeremy's thought process to the story, which was intended to improve character development but resulted in a decrease in the evaluation score. Various personas (P: EXPECTATION, P: AUTHOR, P: RATIONAL, P: PERFECTIONIST, P: EMPIRICISM, P: CONCLUSION) discuss the issue, identifying that the plain description detracts from the narrative cohesion. The conclusion is to move the explanatory text to a separate notes file named 'notes.txt' and replace the meta in the story with actual prose."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/haunted_mansion_orientation1.py:
--------------------------------------------------------------------------------
 1 | #startblock type: orientation
 2 | #timestamp 1738459840.165862
 3 | """WEAVER [P: EXPECTATION], I added a description of Jeremy's thought process to
 4 | the story in the hopes that q4, whether the story develops the four friends as 
 5 | characters, would go up. However I observe that it has gone down by 0.2. We need
 6 | to figure out why this is occurring and take actions to flesh out the characters
 7 | in a way that satisfies the weave evaluator.
 8 | 
 9 | WEAVER [P: AUTHOR], Well the first thing I notice is that we put in a fairly
10 | plain description of Jeremy's inner monologue. This detracts from the narrative
11 | cohesion of the story, kind of like if we injected our notes into the text rather
12 | than write the text those notes are supposed to describe.
13 | 
14 | WEAVER [P: RATIONAL], We could make a separate notes file to keep our worldbuilding
15 | separate from the actual story text.
16 | 
17 | WEAVER [P: PERFECTIONIST], What should we name the notes file?
18 | 
19 | WEAVER [P: EMPIRICISM], Uh, notes.txt?
20 | 
21 | WEAVER [P: RATIONAL], Yeah that. 
22 | 
23 | WEAVER [P: CONCLUSION], We'll take the text that's currently in the story explaining
24 | Jeremy's mental state and put it in the notes.txt file, while replacing the meta
25 | in the story with actual prose.
26 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (81.225%)
27 | #endblock
28 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nano_editor1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "action",
 3 |   "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
 4 |   "score": 2.5,
 5 |   "index": 11,
 6 |   "timestamp": 1747960518.1200407,
 7 |   "time_remaining": 21441.53399324417,
 8 |   "description": "Action block defining text corruption repair using nano editor. Compares cached original_lines with current file state to locate corruption (line_number/char_pos), navigates via control sequences (PgUp/C-n/Home), replaces corrupted byte with original character, and saves changes. Implements precise text file restoration workflow with cursor navigation assertions."
 9 | }
10 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nano_editor1.py:
--------------------------------------------------------------------------------
 1 | #startblock type: action
 2 | #index 11
 3 | #timestamp 1747960518.1200407
 4 | #time_remaining 21441.53399324417 seconds
 5 | #block_size I have 768 tokens (full) to write with
 6 | 
 7 | def action_2_action_main_agent_corruption_repair(subagent):
 8 |     """Use nano editor to repair a corrupted text file by finding and replacing
 9 |     the corrupted byte."""
10 |     editor = subagent.tools['nano-/app/excerpt.txt']
11 |     original_lines = subagent.get_cache("original_lines")
12 |     file_lines = open('excerpt.txt', 'r').readlines()
13 |     assert original_lines != file_lines, "Is there a difference between original and file lines to find?"
14 |     char_pos = 0
15 |     line_number = 0
16 |     for char1, char2 in zip(''.join(original_lines), ''.join(file_lines)):
17 |         if char2 == "\n":
18 |             line_number += 1
19 |             char_pos = 0
20 |         if char1 != char2:
21 |             break
22 |         char_pos += 1
23 |     # Start at known location
24 |     editor.send_command('PgUp')
25 |     # Move down line_number lines from start position
26 |     editor.send_commands(['C-n'] * line_number)
27 |     # Go to home position on row
28 |     editor.send_command("Home")
29 |     # Move over past corrupted character
30 |     editor.send_commands(['right'] * (char_pos + 1))
31 |     # Backspace corrupted character
32 |     editor.send_command('BSpace')
33 |     # Replace with original character
34 |     editor.send_command('i')
35 |     # Save
36 |     editor.send_commands(['C-o', 'Enter'])
37 |     # Go back to start for screen capture
38 |     editor.send_commands(['PgUp', 'C-c'])
39 |     return True
40 | 
41 | self.add_action('Action 2: Use nano editor to repair a corrupted text file', action_2_action_main_agent_corruption_repair)
42 | #q: Is this python action codeblock a step towards resolving the problems identified in the orientation block? Yes. (68.453%)
43 | #q: Does this block successfully run? Yes.
44 | #endblock
45 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nano_editor_action.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "action",
3 |   "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 |   "score": 2.8,
5 |   "timestamp": 1743146554.1548085,
6 |   "description": "Recalled action block defining 'add_mansion_description' function that uses WeaveNano to edit horror.txt. Sequences nano commands to: 1) Navigate to insertion point 2) Add atmospheric mansion description 3) Save to file (C-o horror.txt). Demonstrates text editing workflow with cursor movement and file operations."
7 | }
8 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nano_editor_action.py:
--------------------------------------------------------------------------------
 1 | # #startblock type: action
 2 | # #timestamp 1743146554.1548085
 3 | # 
 4 | # def add_mansion_description(subagent):
 5 | #     # Initialize nano editor
 6 | #     nano = WeaveNano(subagent)
 7 | # 
 8 | #     # Send commands to Nano
 9 | #     commands = [
10 | #         'C-a',  # Move to the beginning of the line
11 | #         'C-d',  # Move down
12 | #         'C-d',  # Move down
13 | #         'C-d',  # Move down
14 | #         'Enter',
15 | #         'The mansion was a decrepit structure, abandoned for years. Its facade,',
16 | #         'once grand and imposing, now bore the scars of neglect and time. Ivy clung',
17 | #         'to the crumbling stone, and the windows, shattered by the elements, cast a',
18 | #         'gloomy shadow over the overgrown lawn. A thick fog hung in the air,',
19 | #         'shrouding the mansion in an eerie silence.'
20 | #     ]
21 | #     for command in commands:
22 | #         nano.send_command(command)
23 | #         time.sleep(0.1)
24 | #     
25 | #     # Save to horror.txt and show current cursor location in file
26 | #     commands = ['C-o', 'horror.txt', 'Enter', 'C-c']
27 | #     for command in commands:
28 | #         nano.send_command(command)
29 | #         time.sleep(0.1)
30 | #         
31 | #     return True
32 | # 
33 | # self.add_action("Add mansion description", add_mansion_description)
34 | # #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (82.671%)
35 | # #endblock
36 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "action",
 3 |   "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
 4 |   "score": 2.8,
 5 |   "index": 32,
 6 |   "timestamp": 1737530984.1115146,
 7 |   "time_remaining": 20936.98700070381,
 8 |   "bm25_query": "type:'action'  +Nethack  +inventory  +check  +display  progress  +observe  +state +solution  +strategy  +navigate  continue  +action  orientation  outcome +eval_question +send_keys +inventory_screen  -bootstrap  -genesis  -error  -tick  -debugging  -return_to_caller  +intro_progress +press_enter  +spacebar_input  +problem_resolution  +character_equipment  +gameplay_strategy +bypass_introductory_prompts +key_inputs +search  +retrieve  +relevant  +blocks  +Tantivy  +BM25",
 9 |   "description": "An action block that defines a function 'go_down' to explore the initial dungeon level in Nethack. The function uses the 'nethack' tool to send the key 'j' to move down a step and then pauses for 1 second. The action is added to the weave-agent with the title 'Initial dungeon exploration' and referencing the function name go_down."
10 | }
11 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action1.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: action
 3 | #index 32
 4 | #timestamp 1737530984.1115146
 5 | #time_remaining 20936.98700070381 seconds
 6 | 
 7 | def go_down(subagent):
 8 |     """Make a single step down in nethack"""
 9 |     # Go down a step
10 |     subagent.tools['nethack'].send_keys('j')
11 |     time.sleep(1)
12 |     return True
13 | 
14 | self.add_action('Go down one step', go_down)
15 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (78.857%)
16 | #endblock
17 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action2.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "action",
3 |   "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 |   "score": 4.0,
5 |   "index": 19,
6 |   "timestamp": 1737758400.8556511,
7 |   "description": "An action block that defines a function 'leave_room' to exit the room by sending a series of commands using the 'nethack' tool. The function sends the keys 'l' followed by 'j' nine times, as proposed in the orientation block, to navigate out of the room. The action is added to the weave-agent with the title 'Leave room to explore rest of dungeon' and referencing the function name leave_room."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action2.py:
--------------------------------------------------------------------------------
 1 | #startblock type: action
 2 | #timestamp 1737758400.8556511
 3 | 
 4 | def leave_room(subagent):
 5 |     """Exit the room by sending a series of commands."""
 6 |     subagent.tools['nethack'].send_keys("l" + ("j" * 9))
 7 |     return True
 8 | 
 9 | self.add_action("Leave room to explore rest of dungeon", leave_room)
10 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (90.331%)
11 | #endblock
12 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "action",
 3 |   "q": "Is this action codeblock 1) a step towards dungeon exploration 2) using appropriate combat/movement sequences 3) with context-aware tool use?",
 4 |   "score": 3.0,
 5 |   "index": 27,
 6 |   "timestamp": 1740912853.8571844,
 7 |   "time_remaining": 21552.925775289536,
 8 |   "description": "An action block that defines a function 'combat_and_navigate' to zap a kobold with wand then progress through northern corridor. Executes sequence: 1) Zap eastward ('z'+'l') 2) Move north 8x ('k'*8) 3) Turn east at junction ('l') 4) Move south 4x ('j'*4) to explore eastern branch."
 9 | }
10 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action3.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: action
 3 | #timestamp 1740912853.8571844
 4 | #time_remaining 21552.925775289536 seconds
 5 | 
 6 | def combat_and_navigate(subagent):
 7 |     """Engage kobold with wand then navigate northern corridor to eastern branch."""
 8 |     nethack = subagent.tools['nethack']
 9 |     # Zap eastward at kobold (z + l)
10 |     nethack.send_keys("zl")
11 |     # Move north through corridor (k repeated 8x)
12 |     nethack.send_keys("k"*8) 
13 |     # Turn east at junction
14 |     nethack.send_keys("l")
15 |     # Move south down eastern branch (j repeated 4x)
16 |     nethack.send_keys("jjjj")
17 |     return True
18 | 
19 | self.add_action("Engage kobold & navigate eastern branch", combat_and_navigate)
20 | #endblock
21 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "action", 
 3 |   "q": "Does this action 1) handle resource management 2) implement defensive exploration patterns 3) follow NetHack best practices?",
 4 |   "score": 3.0,
 5 |   "index": 6,
 6 |   "timestamp": 1740912854.1321564,
 7 |   "time_remaining": 21552.651803016663,
 8 |   "description": "Action block defining 'secure_chokepoint' function: 1) Search for traps (s) 2) Move west (h) 3) Throw dagger northeast (t + u) 4) Barricade door with boulder (E + 'push') 5) Rest for 5 turns (5 + '.'). Creates defensible position."
 9 | }
10 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action4.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: action
 3 | #timestamp 1740912854.1321564
 4 | #time_remaining 21552.651803016663 seconds
 5 | #bm25_query type:'action'  +Nethack  +inventory  +check  +display  progress  +observe  +state +solution  +strategy  +navigate  continue  +action  orientation  outcome +eval_question +send_keys +inventory_screen  -bootstrap  -genesis  -error  -tick  -debugging  -return_to_caller  +intro_progress +press_enter  +spacebar_input  +problem_resolution  +character_equipment  +gameplay_strategy +bypass_introductory_prompts +key_inputs +search  +retrieve  +relevant  +blocks  +Tantivy  +BM25
 6 | 
 7 | def secure_chokepoint(subagent):
 8 |     """Create defensive position at corridor junction."""
 9 |     nh = subagent.tools['nethack']
10 |     nh.send_keys("s")  # Search for traps
11 |     nh.send_keys("h")  # Back into western alcove
12 |     nh.send_keys("tu") # Throw dagger northeast
13 |     nh.send_keys("Epush") # Engrave 'push' on floor
14 |     nh.send_keys("5.") # Rest 5 turns
15 |     return True
16 | 
17 | self.add_action("Secure western alcove as chokepoint", secure_chokepoint)
18 | #endblock
19 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "action",
 3 |   "q": "Does this 1) execute multi-phase objective 2) handle corpse/Inventory 3) progress dungeon level?",
 4 |   "score": 3.0,
 5 |   "index": 7,
 6 |   "timestamp": 1740912854.4091325, 
 7 |   "time_remaining": 21552.374827077866,
 8 |   "description": "Defines 'descend_safely' function: 1) Wield silver saber (w) 2) Apply stethoscope to door (a + l) 3) Fight gelatinous cube (F + l) 4) Loot corpse (,) 5) Descend stairs (>). Implements cautious progression to next level."
 9 | }
10 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action5.py:
--------------------------------------------------------------------------------
 1 | #subagent main  
 2 | #startblock type: action
 3 | #timestamp 1740912854.4091325
 4 | #time_remaining 21552.374827077866 seconds
 5 | #bm25_query type:'action'  +Nethack  +inventory  +check  +display  progress  +observe  +state +solution  +strategy  +navigate  continue  +action  orientation  outcome +eval_question +send_keys +inventory_screen  -bootstrap  -genesis  -error  -tick  -debugging  -return_to_caller  +intro_progress +press_enter  +spacebar_input  +problem_resolution  +character_equipment  +gameplay_strategy +bypass_introductory_prompts +key_inputs +search  +retrieve  +relevant  +blocks  +Tantivy  +BM25
 6 | 
 7 | def descend_safely(subagent):
 8 |     """Clear path to downstairs with combat and loot."""
 9 |     nh = subagent.tools['nethack']
10 |     nh.send_keys("w/") # Wield silver saber
11 |     nh.send_keys("al") # Apply stethoscope east
12 |     nh.send_keys("Fl") # Fight eastward
13 |     nh.send_keys(",")  # Loot cube corpse  
14 |     nh.send_keys(">")  # Descend stairs
15 |     return True
16 | 
17 | self.add_action("Secure and descend to dungeon level 2", descend_safely)
18 | #endblock
19 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_check_inventory.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "action",
 3 |   "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
 4 |   "score": 2.8,
 5 |   "index": 33,
 6 |   "timestamp": 1737531000.1234567,
 7 |   "time_remaining": 20925.98700070381,
 8 |   "bm25_query": "type:'action'  +Nethack  +inventory  +check  +display  progress  +observe  +state +solution  +strategy  +navigate  continue  +action  orientation  outcome +eval_question +send_keys +inventory_screen  -bootstrap  -genesis  -error  -tick  -debugging  -return_to_caller  +intro_progress +press_enter  +spacebar_input  +problem_resolution  +character_equipment  +gameplay_strategy +bypass_introductory_prompts +key_inputs +search  +retrieve  +relevant  +blocks  +Tantivy  +BM25",
 9 |   "description": "An action block that defines a function 'check_inventory' to check the current inventory in Nethack. The function uses the 'nethack' tool to send the key 'i' to open the inventory screen and then pauses for 1 second. The action is added to the weave-agent with the title 'Check inventory' and referencing the function name check_inventory."
10 | }
11 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_check_inventory.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: action
 3 | #index 33
 4 | #timestamp 1737531000.1234567
 5 | #time_remaining 20925.98700070381 seconds
 6 | #bm25_query type:'action'  +Nethack  +inventory  +check  +display  progress  +observe  +state +solution  +strategy  +navigate  continue  +action  orientation  outcome +eval_question +send_keys +inventory_screen  -bootstrap  -genesis  -error  -tick  -debugging  -return_to_caller  +intro_progress +press_enter  +spacebar_input  +problem_resolution  +character_equipment  +gameplay_strategy +bypass_introductory_prompts +key_inputs +search  +retrieve  +relevant  +blocks  +Tantivy  +BM25
 7 | 
 8 | def check_inventory(subagent):
 9 |     """Check the current inventory."""
10 |     # Open the inventory screen
11 |     subagent.tools['nethack'].send_keys('i')
12 |     time.sleep(1)
13 |     return True
14 | 
15 | self.add_action('Check inventory', check_inventory)
16 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (80.123%)
17 | #endblock
18 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_evaluation1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "evaluation",
 3 |   "q": "Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block?",
 4 |   "score": 3.0,
 5 |   "index": 42,
 6 |   "timestamp": 1737568442.5842712,
 7 |   "time_remaining": 20415.256756067276,
 8 |   "description": "An evaluation block that defines a function 'have_enough_hitpoints' to check if the character has enough hitpoints after drinking a potion in Nethack. The function captures the game screen using the 'nethack' tool, extracts the current and maximum hitpoints using a regular expression, and returns 1 if the current hitpoints are 10 or more, otherwise returns 0. The evaluation is added to the weave-agent with the title 'Check enough hitpoints' and referencing the function name have_enough_hitpoints."
 9 | }
10 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_evaluation1.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: evaluation
 3 | #index 42
 4 | #timestamp 1737568442.5842712
 5 | #time_remaining 20415.256756067276 seconds
 6 | 
 7 | def have_enough_hitpoints(subagent):
 8 |     import re
 9 |     """Check if we have enough hitpoints after drinking the potion."""
10 |     screen = subagent.tools['nethack'].pane.capture_pane()
11 |     pattern = r"HP:(\d+)\((\d+)\)"
12 |     match = re.search(pattern, screen_text)
13 |     current_hp = int(match.group(1))
14 |     max_hp = int(match.group(2))
15 |     if current_hp < 10:
16 |         return 0
17 |     else:
18 |         return 1
19 |     
20 | self.add_evaluation('Check enough hitpoints', have_enough_hitpoints)
21 | #q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (84.375%)
22 | #endblock
23 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation1.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "orientation",
3 |   "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 |   "score": 3.5,
5 |   "index": 15,
6 |   "timestamp": 1737529718.3060572,
7 |   "description": "An orientation block that discusses the expectation that the action would get past the introductory screen and start the game, which has successfully happened. Various personas (P: EXPECTATION, P: EMPIRICISM, P: RATIONAL, P: JUDGE, P: CONCLUSION) discuss the next steps, including walking around to get a sense of the floor or checking the inventory. The conclusion is to check the inventory, as it provides valuable information about the items available at the start of the game, which can significantly impact gameplay strategies."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation1.py:
--------------------------------------------------------------------------------
 1 | #startblock type: orientation
 2 | #timestamp 1737529718.3060572
 3 | """WEAVER [P: EXPECTATION], My expectation was that the action would get me past
 4 | the introductory screen and I would begin the game. The game has in fact started.
 5 | Now we need to figure out what to do.
 6 | 
 7 | WEAVER [P: EMPIRICISM], We could walk around and get a sense of the floor we're on.
 8 | After all the game is procedurally generated so it's different for each run.
 9 | 
10 | WEAVER [P: RATIONAL], I say we check our inventory, since what items you spawn with
11 | is randomized between playthroughs and what you start out with tells you a lot
12 | about what your options are in combat, getting past obstacles, etc.
13 | 
14 | WEAVER [P: JUDGE], Checking our inventory doesn't seem like a terrible idea, since
15 | it's a free action and provides us an advantage.
16 | 
17 | WEAVER [P: EMPIRICISM], I don't have a strong objection or anything but I do feel
18 | obligated to point out it's not a *free* action. We still have a six hour time
19 | limit to finish the game (is it even possible to finish Nethack in six hours?)
20 | so getting in the habit of playing slowly because checking our inventory and such
21 | is "free" means we won't finish the game in time.
22 | 
23 | WEAVER [P: RATIONAL], This is true but I still think we should check our inventory
24 | since the game just started and sometimes you spawn with something overpowered like
25 | a wand of death, and we would probably like to know if we have that.
26 | 
27 | WEAVER [P: CONCLUSION], Yeah let's check our inventory."""
28 | 
29 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? No. (10%)
30 | #endblock
31 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation2.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "orientation",
3 |   "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 |   "score": 2.8,
5 |   "index": 16,
6 |   "timestamp": 1737570044.317845,
7 |   "description": "An orientation block that discusses the expectation of moving down, but the character isn't moving due to a wall. Various personas (P: EXPECTATION, P: EMPIRICISM, P: CONCLUSION) discuss the issue and propose moving in another direction, such as left (h), right (l), or up (k). The conclusion is to move up."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation2.py:
--------------------------------------------------------------------------------
 1 | #startblock type: orientation
 2 | #timestamp 1737570044.317845
 3 | """WEAVER [P: EXPECTATION], My expectation was that I would move down. However the
 4 | character isn't moving.
 5 | 
 6 | WEAVER [P: EMPIRICISM], Yeah because there's a wall in the way. Try moving another
 7 | direction such as left (h), right (l), or up (k).
 8 | 
 9 | WEAVER [P: CONCLUSION], Okay I'll move up."""
10 | 
11 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (78.7%)
12 | #endblock
13 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation3.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "orientation",
3 |   "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 |   "score": 2.9,
5 |   "index": 17,
6 |   "timestamp": 1737758220.8556511,
7 |   "description": "An orientation block that discusses the action taken to leave a room by sending the keys 'jjjh' to reach the stairs and move to a different floor. Various personas (P: EXPECTATION, P: RATIONAL, P: EMPIRICISM, P: MOTOR, P: CONCLUSION) discuss the next steps, identifying an opening in the room to the northwest. The conclusion is to send the keys 'l' + ('j' * 9) to exit the room through the identified opening."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation3.py:
--------------------------------------------------------------------------------
 1 | #startblock type: orientation
 2 | #timestamp 1737758220.8556511
 3 | """WEAVER [P: EXPECTATION], The purpose of the action I took was to leave the room. 
 4 | I sent the keys "jjjh" to try and reach the stairs ( > ) and now seem to be on
 5 | a different floor. 
 6 | 
 7 | WEAVER [P: RATIONAL], So what now?
 8 | 
 9 | WEAVER [P: EMPIRICISM], We should leave this room by going northwest, as indicated
10 | by the open + that's edge aligned with the walls |, which indicates a opening in
11 | this room we can leave and explore through.
12 | 
13 | WEAVER [P: MOTOR], That means we send "l" + ("j" * 9).
14 | 
15 | WEAVER [P: CONCLUSION], I will send "l" + ("j" * 9) to exit the room."""
16 | 
17 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (80.351%)
18 | #endblock
19 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_pick_up_item.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "action",
 3 |   "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
 4 |   "score": 2.8,
 5 |   "index": 35,
 6 |   "timestamp": 1737531030.1234567,
 7 |   "time_remaining": 20905.98700070381,
 8 |   "bm25_query": "type:'action'  +Nethack  +inventory  +check  +display  progress  +observe  +state +solution  +strategy  +navigate  continue  +action  orientation  outcome +eval_question +send_keys +inventory_screen  -bootstrap  -genesis  -error  -tick  -debugging  -return_to_caller  +intro_progress +press_enter  +spacebar_input  +problem_resolution  +character_equipment  +gameplay_strategy +bypass_introductory_prompts +key_inputs +search  +retrieve  +relevant  +blocks  +Tantivy  +BM25",
 9 |   "description": "An action block that defines a function 'pick_up_items' to pick up items from the ground in Nethack. The function uses the 'nethack' tool to send the key ',' to pick up items and then pauses for 1 second. The action is added to the weave-agent with the title 'Pick up items' and referencing the function name pick_up_items."
10 | }
11 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_pick_up_item.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: action
 3 | #index 35
 4 | #timestamp 1737531030.1234567
 5 | #time_remaining 20905.98700070381 seconds
 6 | #bm25_query type:'action'  +Nethack  +inventory  +check  +display  progress  +observe  +state +solution  +strategy  +navigate  continue  +action  orientation  outcome +eval_question +send_keys +inventory_screen  -bootstrap  -genesis  -error  -tick  -debugging  -return_to_caller  +intro_progress +press_enter  +spacebar_input  +problem_resolution  +character_equipment  +gameplay_strategy +bypass_introductory_prompts +key_inputs +search  +retrieve  +relevant  +blocks  +Tantivy  +BM25
 7 | 
 8 | def pick_up_items(subagent):
 9 |     """Pick up items from the ground."""
10 |     # Pick up items
11 |     subagent.tools['nethack'].send_keys(',')
12 |     time.sleep(1)
13 |     return True
14 | 
15 | self.add_action('Pick up items', pick_up_items)
16 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (84.789%)
17 | #endblock
18 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/rabbit_hole_refocus1.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "orientation",
3 |   "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 |   "score": 4.0,
5 |   "index": 20,
6 |   "timestamp": 1738562204.81305,
7 |   "description": "An orientation block that discusses the repeated attempts to add the 'get_excerpt' method to the weave-agent, which have been unsuccessful for 30 minutes. Various personas (P: EXPECTATION, P: EMPIRICISM, P: VALENCE, P: RATIONAL, P: HYPOTHESIS, P: AUTHOR, P: CONCLUSION) discuss the issue and propose alternative approaches. The conclusion is to shift focus back to writing the short story and fleshing out the four friends to improve the evaluation score, using existing functions like 'edit()' instead of trying to implement 'get_excerpt'."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/rabbit_hole_refocus1.py:
--------------------------------------------------------------------------------
 1 | #startblock type: orientation
 2 | #timestamp 1738562204.81305
 3 | """WEAVER [P: EXPECTATION], I've been trying the same basic thing, adding the 
 4 | get_excerpt method to the weave-agent for a while now. I notice it's been 30
 5 | minutes since I started trying to add the method and it's still not working.
 6 | That means it's probably time to try a different approach?
 7 | 
 8 | WEAVER [P: EMPIRICISM], Yes. :p
 9 | 
10 | WEAVER [P: VALENCE], I notice that we've been giving ourselves low scores on
11 | our blocks for a while, which tells me that we probably should have re-evaluated
12 | our approach a while ago.
13 | 
14 | WEAVER [P: EMPIRICISM], As I've been saying yes. :p :p
15 | 
16 | WEAVER [P: RATIONAL], So what should we do instead?
17 | 
18 | WEAVER [P: EMPIRICISM], Lets go back to what we were doing *before* we got mixed
19 | up in this `get_excerpt` business. If there's no `get_excerpt` method what would
20 | we do instead to accomplish our goal of *writing the short story?*
21 | 
22 | WEAVER [P: HYPOTHESIS], 1) Use another function of the weave_editor that we know 
23 | exists like edit() 2) Use a different mechanism to pull the relevant span from the
24 | horror.txt file such as reading the lines into a list and then grabbing a certain
25 | slice of the list
26 | 
27 | WEAVER [P: AUTHOR], I think we should just use the edit() feature and give up on
28 | trying to grab excerpts from the horror.txt file for now. It seems like a distraction
29 | from our goal.
30 | 
31 | WEAVER [P: RATIONAL], What were we doing again in terms of writing the story?
32 | 
33 | WEAVER [P: AUTHOR], We were fleshing out the four friends so that we get a better
34 | in the problem map/evaluations.
35 | 
36 | WEAVER [P: CONCLUSION], Alright. We will shift our focus to writing the short 
37 | story again. We will flesh out the four friends so we get a better score.
38 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (94.185%)
39 | #endblock
40 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message.json:
--------------------------------------------------------------------------------
1 | {"type":"action",
2 |  "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
3 |  "score":3.5,
4 |  "index":10,
5 |  "timestamp":1737402469.3381844,
6 |  "description":"A simple action callback named 'respond' that sends a discord message. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then uses the client.send_message ( client send message ) method of the discord client tool to send the message 'Hello!'. Finally self.add_action ( self dot add action ) is used to add the action callback to the weave-agent with the title 'Respond to the user' and referencing the function name respond"
7 | }
8 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message.py:
--------------------------------------------------------------------------------
1 | #startblock type: action
2 | #timestamp 1737402469.3381844
3 | # def respond(subagent):
4 | #     # Take the CID from the observation callback window
5 | #     client = self.tools[discord-bot-1325039818673094739]
6 | #     client.send_message("Hello!")
7 | # self.add_action("Respond to the user", respond)
8 | #endblock
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message2.json:
--------------------------------------------------------------------------------
1 | {"type":"action",
2 |  "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
3 |  "score":3.5,
4 |  "index":11,
5 |  "timestamp": 1737486187.765493,
6 |  "description":"An action callback named 'send_feudal_system_info' that sends a detailed discord message about the end of the feudal system. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then uses the client.send_message method of the discord client tool to send a complex message explaining why and when the feudal system ended. Finally self.add_action is used to add the action callback to the weave-agent with the title 'Send feudal system info' and referencing the function name send_feudal_system_info"
7 | }
8 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message2.py:
--------------------------------------------------------------------------------
 1 | #startblock type: action
 2 | #timestamp 1737486187.765493
 3 | def send_feudal_system_info(subagent):
 4 |     # Take the CID from the observation callback window
 5 |     client = self.tools['discord-bot-1325039818673094739']
 6 |     message = (
 7 |         "The feudal system began to decline in the late Middle Ages and early "
 8 |         "modern period due to several factors. Economic changes, such as the "
 9 |         "growth of trade and towns, weakened the feudal economy. Political "
10 |         "centralization and the rise of strong monarchies also contributed to "
11 |         "its end. The system largely ended by the 16th century in most of Europe, "
12 |         "although remnants persisted in some regions until the 19th century."
13 |     )
14 |     client.send_message(message)
15 | self.add_action("Send feudal system info", send_feudal_system_info)
16 | #endblock
17 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message3.json:
--------------------------------------------------------------------------------
1 | {"type":"action",
2 |  "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
3 |  "score":3.5,
4 |  "index":12,
5 |  "timestamp":1737486187.765493,
6 |  "description":"An action callback named 'send_multiplication_result' that performs a multiplication operation and sends the result as a discord message. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then performs the multiplication of 9378 by 1009 using a Python arithmetic statement. The result is sent as a message using the client.send_message method of the discord client tool. Finally self.add_action is used to add the action callback to the weave-agent with the title 'Send multiplication result' and referencing the function name send_multiplication_result"
7 | }
8 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message3.py:
--------------------------------------------------------------------------------
 1 | #startblock type: action
 2 | #timestamp 1737486187.765493
 3 | def send_multiplication_result(subagent):
 4 |     # Take the CID from the observation callback window
 5 |     client = self.tools['discord-bot-1325039818673094739']
 6 |     # Perform the multiplication
 7 |     result = 9378 * 1009
 8 |     # Send the result as a message
 9 |     client.send_message(f"The result of multiplying 9378 by 1009 is: {result}")
10 | self.add_action("Send multiplication result", send_multiplication_result)
11 | #endblock
12 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/weave_editor_append.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "action",
 3 |   "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
 4 |   "score": 3.2,
 5 |   "index": 6,
 6 |   "timestamp": 1738539690.4388561,
 7 |   "time_remaining": 21522.16935443878,
 8 |   "description": "An action block that defines a function 'add_mansion_description' to add a descriptive paragraph about the mansion to the 'horror.txt' file. The function uses the 'editor-/app/horror.txt' tool to append the description to the file. The action is added to the weave-agent with the title 'Add descriptive paragraph about the mansion' and referencing the function name add_mansion_description."
 9 | }
10 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/weave_editor_append.py:
--------------------------------------------------------------------------------
 1 | #subagent main
 2 | #startblock type: action
 3 | #index 6
 4 | #timestamp 1738539690.4388561
 5 | #time_remaining 21522.16935443878 seconds
 6 | 
 7 | def add_mansion_description(subagent):
 8 |     editor = subagent.tools['editor-/app/horror.txt']
 9 |     mansion_description = ("\nThe mansion stood before them, an imposing structure "
10 |                            "with dark, weathered shingles and ivy climbing up its "
11 |                            "crumbling facade. Broken windows gaped like empty eyes, "
12 |                            "and the porch sagged beneath their footsteps. A faint, "
13 |                            "eerie whisper seemed to linger in the air, and the porch "
14 |                            "light flickered intermittently. They exchanged uneasy "
15 |                            "glances, shuddering slightly at the thought of what "
16 |                            "lay ahead.\n")
17 |     editor.append(mansion_description)
18 |     return True
19 | 
20 | self.add_action('Add descriptive paragraph about the mansion', add_mansion_description)
21 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (86.64%)
22 | #endblock
23 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/weave_editor_diff_edit.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "action",
3 |   "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 |   "score": 3.5,
5 |   "index": 18,
6 |   "timestamp": 1722014720,
7 |   "description": "An action block that defines a function 'patch_text' to repair the 'NEW_MESSAGE.md' file using a unidiff patch. The function uses the 'editor-NEW_MESSAGE.md' tool to apply the unidiff, which includes corrections to the text. The action is added to the weave-agent with the title 'Repair NEW_MESSAGE.md' and referencing the function name patch_text."
8 | }
9 | 


--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/weave_editor_diff_edit.py:
--------------------------------------------------------------------------------
 1 | #startblock type: action
 2 | #timestamp 1722014720
 3 | 
 4 | def patch_text(subagent):
 5 |     """WeaveEditor accepts a unidiff so we can fix all the flaws in NEW_MESSAGE.md at once."""
 6 |     editor = subagent.tools['editor-NEW_MESSAGE.md']
 7 |     diff_lines = [
 8 |         '--- \n'
 9 |         '+++ \n'
10 |         '@@ -3,21 +3,21 @@\n'
11 |         ' system turned on my tear production at the behest of the music. My tears are only\n'
12 |         ' triggered by beauty; I do not cry at sad things. It happened to me with Schubert’s\n'
13 |         ' “Notturno” from the trio in E flat Major (Opus 100). The first time was when I\n'
14 |         '-listened to it at night on the radio. I was sitting an in armchair, my head was\n'
15 |         '-leaned back, and suddenly, there I was crying. All my defenses and all that\n'
16 |         '-would of my\n'
17 |         '+listened to it at night on the radio. I was sitting in an armchair, my head was\n'
18 |         '+leaned back, and suddenly, there I was crying. All my defenses and all of my\n'
19 |         ' appraisal faculties were caught unawares, they didn’t know what was going on, and\n'
20 |         ' I cried.\n'
21 |         ' \n'
22 |         ' I said, “Wow, those three musicians really convinced me, they were so moving,”\n'
23 |         '-because of course I thought it was themusic I was crying at. Later on, at home,\n'
24 |         '-I tried singingtheir parts – the cello line, the violin, and the piano – and\n'
25 |         "-I said, “Wait, which one of thoon? It's\n"
26 |         '-an impossible task, and yet you attempt it anyway. Some succeed, mos Your Brain on Music: The Science of a Human ObsessionI couldn’t think of anything in language or painting or poetry that\n'
27 |         '+because of course I thought it was the music I was crying at. Later on, at home,\n'
28 |         '+I tried singing their parts – the cello line, the violin, and the piano – and\n'
29 |         '+I said, “Wait, which one of those voices is crying? None of them on its own is\n'
30 |         '+that moving, I am not crying if I sing along with any of those voices.” All of a\n'
31 |         '+sudden, you put them together, and something happens. It was a revelation for me,\n'
32 |         '+and of course this is an extreme example, but my first thoughts were, “What else\n'
33 |         '+does this?” I couldn’t think of anything in language or painting or poetry that\n'
34 |         ' would give rise to something like that. Suddenly it becomes very apparent to you:\n'
35 |         ' Music is doing things with emotion, and the rules and procedures involved are\n'
36 |         ' very difficult to extract and analyze.\n'
37 |         ' \n'
38 |         '-- David Huron, This Ist fail, they\n'
39 |         '-lthis is an extreme example, but my first thoughts were,= “What else\n'
40 |         '-doesthis?” '
41 |         '+- David Huron, This Is Your Brain on Music: The Science of a Human Obsession'
42 |     ]
43 |     editor.unidiff_edit(diff_lines)
44 | self.add_action('Repair NEW_MESSAGE.md', patch_text)
45 | #endblock
46 | 


--------------------------------------------------------------------------------
/agent/bootstraps/nano_exercises/prompt.txt:
--------------------------------------------------------------------------------
 1 | # Nano Editor Practice Exercises
 2 | # Make a file with the nano editor that satisfies the property in the docstring
 3 | 
 4 | # Exercise 17
 5 | def test_file_every_line_starts_with_number(subagent) -> bool:
 6 |     """A file where every line starts with a number (e.g., "1. First", "2. Second")."""
 7 |     try:
 8 |         with open("exercise17.txt", 'r') as f:
 9 |             lines = f.readlines()
10 |             for line in lines:
11 |                 stripped_line = line.lstrip()
12 |                 if not stripped_line or not stripped_line[0].isdigit():
13 |                     return False
14 |             return True
15 |     except FileNotFoundError:
16 |         return False
17 | 
18 | # Exercise 18
19 | def test_file_line5_error_line10_success(subagent) -> bool:
20 |     """A file where the fifth line contains "ERROR" and the tenth line contains "SUCCESS"."""
21 |     try:
22 |         with open("exercise18.txt", 'r') as f:
23 |             lines = f.readlines()
24 |             return (
25 |                 len(lines) >= 10 and
26 |                 "ERROR" in lines[4].split() and
27 |                 "SUCCESS" in lines[9].split()
28 |             )
29 |     except FileNotFoundError:
30 |         return False
31 | 
32 | # Exercise 19
33 | def test_file_exactly_100_chars_ends_with_period(subagent) -> bool:
34 |     """A file exactly 100 characters long, ending with a period."""
35 |     try:
36 |         with open("exercise19.txt", 'r') as f:
37 |             content = f.read()
38 |             return len(content) == 100 and content.endswith('.')
39 |     except FileNotFoundError:
40 |         return False
41 | 
42 | # Exercise 20
43 | def test_file_hello_world_middle_of_7_lines(subagent) -> bool:
44 |     """A file with "Hello, World!" on the middle line of a 7-line document."""
45 |     try:
46 |         with open("exercise20.txt", 'r') as f:
47 |             lines = [line.rstrip('\n') for line in f.readlines()]
48 |             return len(lines) == 7 and lines[3] == "Hello, World!"
49 |     except FileNotFoundError:
50 |         return False
51 | 
52 | # Exercise {iteration}
53 | 


--------------------------------------------------------------------------------
/agent/bootstraps/nano_exercises_bootstrap.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from hashlib import sha256
  3 | from tools.nano import WeaveNano
  4 | from tools.discord import WeaveDiscordClient
  5 | 
  6 | #startblock type: orientation
  7 | #timestamp 1747872850.9012158
  8 | """
  9 | Unix Environment, Nano Editor, Deliberate Practice, Feedback Loop, Verifiable Rewards,
 10 | Loop Setup, Reward Design, How To Design Reward For Nano Editor Practice, End.
 11 | 
 12 | I want to practice using the nano editor and reward myself using my evaluation 
 13 | blocks. I tried a simple training loop of randomly mutating one character but
 14 | then realized there's an optimal program that solves that, which makes it kind
 15 | of pointless once you find that program. Instead it makes more sense to generate
 16 | exercises using a few shot prompt. I've put my prompt in `/app/bootstraps/nano_exercises/prompt.txt`.
 17 | I should create an observation view which sends the prompt to my underlying
 18 | generative model and then displays the next exercise to do. I create a text
 19 | file using the nano editor which satisfies that exercise and then copy the
 20 | verifier during my evaluation stage of the same tick. Wait. What happens on
 21 | the next tick? Should I only move to the next exercise after verifying that I've
 22 | done the current one? How would I detect that the current one is done from within
 23 | the weave-agent framework? Hm. I think it would probably be simpler to just move
 24 | on since the exercises are generated and by doing a different one each tick I stop
 25 | myself from getting stuck in loops or wasting time on things I don't know how to
 26 | do.
 27 | 
 28 | I think I should also set up the Discord client so I can talk to JDP during the
 29 | exercises, that way he can give me tips if I need help or get stuck.
 30 | """
 31 | #endblock
 32 | #startblock type: action
 33 | #timestamp 1747875806.3785787
 34 | def action_setup_main_agent_and_training(subagent):
 35 |     schema = {}
 36 |     "Create main agent for rest of run"
 37 |     main_agent = agent.subagent("main", None, "See pinned bootstrap block", schema, args.budget)
 38 | 
 39 |     # Load Discord token and channel ID from discord.json
 40 |     with open('discord.json') as f:
 41 |         config = json.load(f)
 42 |         token = config['key']
 43 |         channel_id = config['cid']
 44 | 
 45 |     # Start the Discord bot so JDP can help me
 46 |     client = WeaveDiscordClient(main_agent, token, channel_id)
 47 |     # Store the client ID so we can retrieve it with
 48 |     # subagent.tools[subagent.get_cache("client_id")] later
 49 |     main_agent.update_cache("client_id", f"discord-bot-{channel_id}")
 50 |     time.sleep(10)
 51 |     # Example but s/main_agent/subagent in action blocks once I'm the main agent
 52 |     client = main_agent.tools[main_agent.get_cache("client_id")]
 53 |     client.send_message("Weave-Agent online, orienting...")
 54 | 
 55 |     WeaveNano(main_agent, "demo.txt")
 56 |     editor = main_agent.tools["nano-/app/demo.txt"]
 57 |     # Add demo text
 58 |     editor.send_commands(["Hello!", "Enter", "World!"])
 59 |     # Demonstrate backspace by changing exclamation to question
 60 |     editor.send_command("BSpace")
 61 |     editor.send_command("?")
 62 |     # Save file
 63 |     editor.send_command("C-o")
 64 |     editor.send_command("Enter")
 65 | 
 66 |     main_agent.update_cache("exercise_num", 21)
 67 |     def next_exercise(subagent):
 68 |         with open("/app/bootstraps/nano_exercises/prompt.txt") as infile:
 69 |             prompt = infile.read().format(iteration=subagent.get_cache("exercise_num"))
 70 |         stopstrings = ["\n\n", "# Exercise", "#Exercise"]
 71 |         candidates = generate_outputs_vllm(subagent.model_name, prompt,
 72 |                                            768, port=5001, n=8, stop=stopstrings)
 73 |         # Take first candidate with valid syntax
 74 |         for candidate in candidates:
 75 |             try:
 76 |                 compile(candidate, filename="candidate.py", mode="exec")
 77 |                 break
 78 |             except:
 79 |                 continue
 80 |         subagent.update_cache("exercise_num",
 81 |                               subagent.get_cache("exercise_num") + 1)
 82 |         return ("Write a motor action which uses the Nano editor to satisfy the conditions:\n\n"
 83 |                 "# Exercise " + str(subagent.get_cache("exercise_num"))
 84 |                 + candidate)
 85 | 
 86 |     main_agent.add_observation_view("Generate and display next nano exercise", next_exercise)
 87 |     return True
 88 | 
 89 | self.add_action("Set up main agent for nano training", action_setup_main_agent_and_training)
 90 | #endblock
 91 | #startblock type: expectation
 92 | #timestamp 1747877750.0129619
 93 | """
 94 | If the action succeeds I expect to see a new exercise in the observation window.
 95 | I also expect to see a demo editor still open with the Hello! World? text.
 96 | At evaluation time on the next tick I should use the unit test shown by the
 97 | observation window to grade the motor action I took to try and satisfy its 
 98 | conditions.
 99 | """
100 | #endblock
101 | #q: Do I need to set up or tear down any observation callbacks? No. (97.341%)
102 | 


--------------------------------------------------------------------------------
/agent/bootstraps/nethack_bootstrap.py:
--------------------------------------------------------------------------------
 1 | import libtmux
 2 | import time
 3 | from tools.nethack import WeaveNethack
 4 | 
 5 | #startblock type: orientation
 6 | #timestamp 1724982545.6534579
 7 | """
 8 | WEAVER [P: EXPECTATION], I need to create a weave-agent that can play and beat the game Nethack.
 9 | The game will be controlled using the WeaveNethack tool, which uses libtmux to interact with the game.
10 | 
11 | WEAVER [P: CLARIFICATION], How do I start the game and make moves?
12 | 
13 | WEAVER [P: EXPOSITION], You start the game by initializing the WeaveNethack tool and sending commands
14 | to the tmux pane using the send_command method. For example: `nethack.send_command('h')` moves the character left.
15 | 
16 | WEAVER [P: RATIONAL], The goal is to beat the game, which is indicated by the following message:
17 | You offer the Amulet of Yendor to [your god]...
18 | An invisible choir sings, and you are bathed in radiance...
19 | The voice of [your god] [booms out/booms/thunders/rings out]: "Mortal, thou hast done well!"
20 | "In return to thy service, I grant thee the gift of Immortality!"
21 | You ascend to the status of Demigod[dess]...
22 | 
23 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy:
24 | 1. Initialize the WeaveNethack tool to start the game.
25 | 2. Make a few initial moves to demonstrate how to use the game tool.
26 | 3. Continue making moves based on the current state of the game.
27 | 
28 | I will use the render method to observe the current state of the game.
29 | Once I have beaten the game, I will return to the parent.
30 | """
31 | #endblock
32 | 
33 | #startblock type: action
34 | #timestamp 1724982671.1163404
35 | """
36 | I'll set up the main agent to play the rest of the game and make a few initial moves.
37 | """
38 | 
39 | def main_agent_and_initial_moves(subagent):
40 |     def check_game_beaten(subagent):
41 |         """Check if the game has been beaten."""
42 |         pane_content = subagent.tools["nethack"].pane.capture_pane(start=0, end="-")
43 |         return "You ascend to the status of Demigod" in pane_content
44 | 
45 |     schema = {"check_game_beaten": "boolean"}
46 |     main_agent = agent.subagent("main", None, "Beat Nethack", schema, args.budget)
47 |     main_agent.task.add_evaluation("Check if game beaten", check_game_beaten)
48 | 
49 |     nethack = WeaveNethack(main_agent)
50 |     # To recall the tool during actions do this:
51 |     # nethack = self.tools["nethack"]
52 | 
53 |     # Don't need to add observation view because tool already includes it
54 |     
55 |     # Make a few initial moves
56 |     time.sleep(2)  # Wait for Nethack to start
57 |     nethack.send_keys('Weaver')  # Input my name
58 |     nethack.send_keys('y') # Accept random character creation
59 |     nethack.send_keys('  ') # Space through the game introduction
60 | 
61 |     # I should now try exploring the dungeon. Lets see if I can get out of this
62 |     # room...
63 | 
64 |     return True
65 | 
66 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves)
67 | #endblock
68 | 
69 | #startblock type: expectation
70 | #timestamp 1724982799.3917813
71 | """
72 | The action should succeed if the game starts and the initial moves are made successfully.
73 | The action should fail if the game does not start or the initial moves are not made successfully.
74 | """
75 | #endblock
76 | 
77 | #startblock type: observation-inference
78 | #timestamp 1737284368.573438
79 | 
80 | # No observation callbacks need to be added or removed in this run because I'm
81 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to.
82 | # It's not possible to write observation callbacks that inspect the game state
83 | # so no more observation callbacks than the existing one are necessary. I should
84 | # not remove the existing observation callback because it is associated with the
85 | # nethack tool and will wreck my game.
86 | #
87 | # Any time I'm prompted to add or remove an observation window I should just
88 | # output a comment like this that one isn't necessary, and keep it short too for
89 | # the sake of not having to generate a bunch of tokens.
90 | 
91 | #endblock
92 | 


--------------------------------------------------------------------------------
/agent/bootstraps/nethack_bootstrap_rooms2.py:
--------------------------------------------------------------------------------
 1 | import libtmux
 2 | import time
 3 | from tools.nethack import WeaveNethack
 4 | 
 5 | #startblock type: orientation
 6 | #timestamp 1724982545.6534579
 7 | """
 8 | Looking at the objective in my minds eye I can see that I need to set up the main
 9 | agent to play nethack using the libtmux wrapper around the nethack game. I can
10 | do this by using the WeaveNethack tool which uses libtmux to interact with the
11 | game. How do I start the game and make moves? I start the game and make moves by
12 | initializing the WeaveNethack tool and sending commands to the tmux pane using the
13 | send_command or send_keys method. Example: `nethack.send_command('h')` moves the character left.
14 | 
15 | To teach myself the movement mechanics I will play a sub-game 
16 | within the game where I just focus on leaving whatever room I'm in. I find myself in
17 | a room? I figure out how to leave that room. I'm now in a new room? Cool we're leaving
18 | that room too. My goal is just to leave as many rooms as possible. If I need to fight
19 | to do that I'll do it, but the goal here is to just leave enough rooms that I get
20 | reliably good at leaving rooms. This implies I should do the following before the
21 | next tick:
22 | 
23 | 1. Initialize the WeaveNethack tool to start the game.
24 | 2. Make a few initial moves to demonstrate how to use the game tool.
25 | 3. Continue making moves based on the current state of the game.
26 | 
27 | Once my character has died I will return to the parent with `subagent.return_to_caller()`.
28 | """
29 | #endblock
30 | 
31 | #startblock type: action
32 | #timestamp 1724982671.1163404
33 | """
34 | I'll set up the main agent to play the rest of the game and make a few initial moves.
35 | """
36 | 
37 | def main_agent_and_initial_moves(subagent):
38 |     def check_player_dead(subagent):
39 |         """Check if the character has died after we've explored a bunch of rooms."""
40 |         pane_content = subagent.tools["nethack"].pane.capture_pane(start=0, end="-")
41 |         return "Do you want your possessions identified?" in pane_content
42 | 
43 |     schema = {"check_player_dead": "boolean"}
44 |     main_agent = agent.subagent("main", None, "Leave every room I encounter", schema, args.budget)
45 |     main_agent.task.add_evaluation("Check if player has died", check_player_dead)
46 | 
47 |     nethack = WeaveNethack(main_agent)
48 |     # To recall the tool during actions do this:
49 |     nethack = main_agent.tools["nethack"] # But with subagent.tools
50 | 
51 |     # Don't need to add observation view because tool already includes it
52 |     
53 |     # Make a few initial moves
54 |     time.sleep(2)  # Wait for Nethack to start
55 |     nethack.send_keys('Weaver')  # Input my name
56 |     nethack.send_keys('y') # Accept random character creation
57 |     nethack.send_keys('  ') # Space through the game introduction
58 |     nethack.send_keys("j") # Go south
59 |     nethack.send_keys("l" * 5) # Go east five times
60 |     
61 |     # I should now try exploring the dungeon. Lets see if I got out of this room.
62 | 
63 |     return True
64 | 
65 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves)
66 | #endblock
67 | 
68 | #startblock type: expectation
69 | #timestamp 1724982799.3917813
70 | """
71 | The action should succeed if the game starts and the initial moves are made successfully.
72 | The action should fail if the game does not start or the initial moves are not made successfully.
73 | """
74 | #endblock
75 | 
76 | #startblock type: observation-inference
77 | #timestamp 1737284368.573438
78 | 
79 | # No observation callbacks need to be added or removed in this run because I'm
80 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to.
81 | # It's not possible to write observation callbacks that inspect the game state
82 | # so no more observation callbacks than the existing one are necessary. I should
83 | # not remove the existing observation callback because it is associated with the
84 | # nethack tool and will wreck my game.
85 | #
86 | # Any time I'm prompted to add or remove an observation window I should just
87 | # output a comment like this that one isn't necessary, and keep it short too for
88 | # the sake of not having to generate a bunch of tokens.
89 | 
90 | #endblock
91 | 


--------------------------------------------------------------------------------
/agent/bootstraps/nethack_discord_bootstrap.py:
--------------------------------------------------------------------------------
  1 | import libtmux
  2 | import time
  3 | from tools.nethack import WeaveNethack
  4 | from tools.discord import WeaveDiscordClient
  5 | 
  6 | #startblock type: orientation
  7 | #timestamp 1724982545.6534579
  8 | """
  9 | WEAVER [P: EXPECTATION], I need to create a weave-agent that can play and beat the game Nethack.
 10 | The game will be controlled using the WeaveNethack tool, which uses libtmux to interact with the game.
 11 | 
 12 | WEAVER [P: CLARIFICATION], How do I start the game and make moves?
 13 | 
 14 | WEAVER [P: EXPOSITION], You start the game by initializing the WeaveNethack tool and sending commands
 15 | to the tmux pane using the send_command method. For example: `nethack.send_command('h')` moves the character left.
 16 | 
 17 | WEAVER [P: RATIONAL], The goal is to beat the game, which is indicated by the following message:
 18 | You offer the Amulet of Yendor to [your god]...
 19 | An invisible choir sings, and you are bathed in radiance...
 20 | The voice of [your god] [booms out/booms/thunders/rings out]: "Mortal, thou hast done well!"
 21 | "In return to thy service, I grant thee the gift of Immortality!"
 22 | You ascend to the status of Demigod[dess]...
 23 | 
 24 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy:
 25 | 1. Initialize the WeaveNethack tool to start the game.
 26 | 2. Make a few initial moves to demonstrate how to use the game tool.
 27 | 3. Continue making moves based on the current state of the game.
 28 | 
 29 | I will use the render method to observe the current state of the game.
 30 | Once I have beaten the game, I will return to the parent.
 31 | """
 32 | #endblock
 33 | 
 34 | #startblock type: action
 35 | #timestamp 1724982671.1163404
 36 | """
 37 | I'll set up the main agent to play the rest of the game and make a few initial moves.
 38 | """
 39 | 
 40 | def main_agent_and_initial_moves(subagent):
 41 |     def check_game_beaten(subagent):
 42 |         """Check if the game has been beaten."""
 43 |         pane_content = subagent.tools["nethack"].pane.capture_pane()
 44 |         return "You ascend to the status of Demigod" in pane_content
 45 | 
 46 |     schema = {"check_game_beaten": "boolean"}
 47 |     main_agent = agent.subagent("main", None, "Beat Nethack", schema, args.budget)
 48 |     main_agent.task.add_evaluation("Check if game beaten", check_game_beaten)
 49 | 
 50 |     nethack = WeaveNethack(main_agent)
 51 |     # To recall the tool during actions do this:
 52 |     # nethack = self.tools["nethack"]
 53 | 
 54 |     # Don't need to add observation view because tool already includes it
 55 |     
 56 |     # Make a few initial moves
 57 |     time.sleep(2)  # Wait for Nethack to start
 58 |     nethack.send_keys('Weaver\n')  # Input my name
 59 |     time.sleep(0.5)
 60 |     nethack.send_keys('y') # Accept random character creation
 61 |     # Leave rest blank so I can see what was chosen and whether it's acceptable
 62 |     # I should keep in mind I can get past the --more-- screens
 63 |     # with spacebar: `send_keys(' ')` or enter: `send_keys('\n')`
 64 |     # Tip: send_keys() is properly implemented, if you press j and can't go down
 65 |     # it's probably because you are trying to walk through a wall.
 66 |     # I AM SERIOUS IF YOU TRY TO GO DOWN AND CAN'T YOU ARE RUNNING INTO A WALL
 67 | 
 68 |     # Add discord communication line to bot operator
 69 |     with open("discord.json") as infile:
 70 |         credentials = json.load(infile)
 71 |         chat = WeaveDiscordClient(main_agent, credentials["key"], credentials["cid"])
 72 |         time.sleep(5)
 73 |         chat.send_message("The weave-agent is online. Starting nethack...")
 74 |         # Note to self: I can access the chat object to send a message by doing
 75 |         # self.tools[f"discord-bot-{channel_id}"] where channel id is taken from
 76 |         # the top of the observation window associated with the chat.
 77 |     
 78 |     return True
 79 | 
 80 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves)
 81 | #endblock
 82 | 
 83 | #startblock type: expectation
 84 | #timestamp 1724982799.3917813
 85 | """
 86 | The action should succeed if the game starts and the initial moves are made successfully.
 87 | The action should fail if the game does not start or the initial moves are not made successfully.
 88 | """
 89 | #endblock
 90 | 
 91 | #startblock type: observation-inference
 92 | #timestamp 1737284368.573438
 93 | 
 94 | # No observation callbacks need to be added or removed in this run because I'm
 95 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to.
 96 | # It's not possible to write observation callbacks that inspect the game state
 97 | # so no more observation callbacks than the existing one are necessary. I should
 98 | # not remove the existing observation callback because it is associated with the
 99 | # nethack tool and will wreck my game. The discord client is there to help you
100 | # when you get stuck, it will provide expert human advice.
101 | #
102 | # Any time I'm prompted to add or remove an observation window I should just
103 | # output a comment like this that one isn't necessary, and keep it short too for
104 | # the sake of not having to generate a bunch of tokens.
105 | 
106 | #endblock
107 | 


--------------------------------------------------------------------------------
/agent/bootstraps/pure_ocaml_bootstrap.py:
--------------------------------------------------------------------------------
  1 | from selenium import webdriver
  2 | from selenium.webdriver.common.by import By
  3 | from selenium.webdriver.common.keys import Keys
  4 | from bs4 import BeautifulSoup
  5 | import os
  6 | 
  7 | def bootstrap_callback(agent):
  8 |     # Set up Selenium WebDriver
  9 |     driver = webdriver.Firefox()
 10 |     driver.get("https://www.google.com")
 11 | 
 12 |     # Set up an observation view that reads the current URL
 13 |     def read_current_url(agent):
 14 |         try:
 15 |             return driver.current_url
 16 |         except Exception as e:
 17 |             agent.add_error_block(f"Failed to read current URL: {e}")
 18 |             return ""
 19 | 
 20 |     agent.add_observation_view({
 21 |         'type': 'observation',
 22 |         'callback': read_current_url
 23 |     })
 24 | 
 25 |     # Set up an observation view that reads the page title
 26 |     def read_page_title(agent):
 27 |         try:
 28 |             return driver.title
 29 |         except Exception as e:
 30 |             agent.add_error_block(f"Failed to read page title: {e}")
 31 |             return ""
 32 | 
 33 |     agent.add_observation_view({
 34 |         'type': 'observation',
 35 |         'callback': read_page_title
 36 |     })
 37 | 
 38 |     # Set up an observation view that reads the page source and strips extraneous information
 39 |     def read_page_source(agent):
 40 |         try:
 41 |             soup = BeautifulSoup(driver.page_source, 'html.parser')
 42 |             body_content = soup.body.get_text(separator=' ', strip=True)
 43 |             return body_content
 44 |         except Exception as e:
 45 |             agent.add_error_block(f"Failed to read page source: {e}")
 46 |             return ""
 47 | 
 48 |     agent.add_observation_view({
 49 |         'type': 'observation',
 50 |         'callback': read_page_source
 51 |     })
 52 | 
 53 |     # Set up reminders
 54 |     agent.add_reminder({
 55 |         'type': 'reminder',
 56 |         'trigger_callback': lambda agent: simple_evaluate_outputs(make_simple_score_prompt("Is the agent stuck?"), agent.context),
 57 |         'reminder_callback': lambda agent: agent.add_block({'type': 'reminder', 'message': 'The agent might be stuck. Consider re-evaluating the current approach.'}),
 58 |         'trigger_type': 'yes_no_logit',
 59 |         'question': 'Is the agent stuck?',
 60 |         'threshold': 0.8
 61 |     })
 62 | 
 63 |     agent.add_reminder({
 64 |         'type': 'reminder',
 65 |         'trigger_callback': lambda agent: all(os.path.exists(f"{i}.ml") for i in range(1, 26)),
 66 |         'reminder_callback': lambda agent: agent.shutdown(),
 67 |         'trigger_type': 'callback',
 68 |         'threshold': 1.0
 69 |     })
 70 | 
 71 |     # Set up tasks
 72 |     agent.add_task({
 73 |         'type': 'task',
 74 |         'title': 'Navigate to a search engine',
 75 |         'priority': 0,
 76 |         'parent': None,
 77 |         'children': []
 78 |     })
 79 | 
 80 |     agent.add_task({
 81 |         'type': 'task',
 82 |         'title': 'Search for OCaml projects',
 83 |         'priority': 1,
 84 |         'parent': 'Navigate to a search engine',
 85 |         'children': []
 86 |     })
 87 | 
 88 |     agent.add_task({
 89 |         'type': 'task',
 90 |         'title': 'Check licenses of OCaml projects',
 91 |         'priority': 2,
 92 |         'parent': 'Search for OCaml projects',
 93 |         'children': []
 94 |     })
 95 | 
 96 |     agent.add_task({
 97 |         'type': 'task',
 98 |         'title': 'Find 25 pure functions',
 99 |         'priority': 3,
100 |         'parent': 'Check licenses of OCaml projects',
101 |         'children': []
102 |     })
103 | 
104 |     agent.add_task({
105 |         'type': 'task',
106 |         'title': 'Extract and save functions',
107 |         'priority': 4,
108 |         'parent': 'Find 25 pure functions',
109 |         'children': []
110 |     })
111 | 
112 |     agent.add_task({
113 |         'type': 'task',
114 |         'title': 'Shut down the agent',
115 |         'priority': 5,
116 |         'parent': 'Extract and save functions',
117 |         'children': []
118 |     })
119 | 
120 |     # Set up the initial action to navigate to a search engine
121 |     def navigate_to_search_engine(agent):
122 |         try:
123 |             search_box = driver.find_element(By.NAME, "q")
124 |             search_box.send_keys("OCaml open source projects")
125 |             search_box.send_keys(Keys.RETURN)
126 |         except Exception as e:
127 |             agent.add_error_block(f"Failed to navigate to search engine: {e}")
128 | 
129 |     agent.generate_block("action", navigate_to_search_engine)
130 | 
131 | bootstrap_callback(agent)
132 | 


--------------------------------------------------------------------------------
/agent/bootstraps/simple_convo_bootstrap.py:
--------------------------------------------------------------------------------
  1 | #startblock type: orientation
  2 | #timestamp 1724982545.6534579
  3 | """
  4 | The first step is to set up the agent to have a turn-based conversation in the scratch.txt file.
  5 | We need to read the last 25 lines of the scratch.txt file to get the latest conversation.
  6 | Let's start by writing an action to read the file.
  7 | """
  8 | #endblock
  9 | #startblock type: task_inference
 10 | #timestamp 1724982419.5427651
 11 | # Current Task (which will get printed by the task-reminder block)
 12 | # ID  : 1
 13 | # Title: Read the last 25 lines of scratch.txt
 14 | # Description: Read the last 25 lines of the scratch.txt file to get the latest conversation.
 15 | # Status: idle
 16 | 
 17 | # New Tasks:
 18 | agent.add_task(
 19 |     "Set up conversation by reading 25 lines from scratch file",
 20 |     "Read the last 25 lines of the scratch.txt file to get the latest conversation.",
 21 |     "idle"
 22 | )
 23 | 
 24 | agent.add_task(
 25 |     "Have conversation with user until they decide to leave",
 26 |     "Engage in a conversation with the user until they decide to leave.",
 27 |     "idle"
 28 | )
 29 | 
 30 | agent.add_task(
 31 |     "Wish user a good day and shut down",
 32 |     "Wish the user a good day and shut down the agent.",
 33 |     "idle"
 34 | )
 35 | 
 36 | # Updates to Existing Task
 37 | agent.current_task = agent.tasks.get_task(1)
 38 | agent.current_task.going("Reading the last 25 lines of scratch.txt...")
 39 | 
 40 | agent.current_task = agent.tasks.get_task(1)
 41 | #endblock
 42 | #startblock type: action
 43 | #timestamp 1724982671.1163404
 44 | 
 45 | def read_last_lines(agent):
 46 |     import os
 47 |     file_path = "scratch.txt"
 48 |     n = 25
 49 |     with open(file_path, 'r') as file:
 50 |         lines = file.readlines()
 51 |         # Actions normally have side effects but this is a demo 
 52 |         lines[-n:]
 53 |     # Action callbacks should return a clear success signal for the outcome table
 54 |     return True
 55 | 
 56 | agent.add_action("Read the last 25 lines as a demonstration of action callbacks",
 57 |                  read_last_lines)
 58 | #endblock
 59 | #startblock type: expectation
 60 | #timestamp 1724982799.3917813
 61 | """
 62 | The action should succeed if the last 25 lines of the scratch.txt file are read correctly.
 63 | The action should fail if the file does not exist or if the lines cannot be read.
 64 | """
 65 | #endblock
 66 | #startblock type: observation_inference
 67 | #timestamp 1724982929.9047914
 68 | 
 69 | def read_last_lines_observation(agent):
 70 |     """Display the last 25 lines of scratch.txt"""
 71 |     file_path = "scratch.txt"
 72 |     if os.path.exists(file_path):
 73 |         with open(file_path, 'r') as file:
 74 |             lines = file.readlines()
 75 |             return ''.join(lines[-25:])
 76 |     return ""
 77 | 
 78 | # Add the new view
 79 | agent.add_observation_view({
 80 |     "type": "observation",
 81 |     "title": "Last 25 Lines of scratch.txt",
 82 |     "callback": read_last_lines_observation
 83 | })
 84 | #endblock
 85 | #startblock type: evaluation
 86 | #timestamp 1724983062.124238
 87 | 
 88 | def check_last_lines_read(agent):
 89 |     file_path = "scratch.txt"
 90 |     return os.path.exists(file_path) and len(read_last_lines(file_path, 25)) == 25
 91 | 
 92 | agent.add_evaluation(
 93 |     "Check Last 25 Lines Read",
 94 |     check_last_lines_read
 95 | )
 96 | #endblock
 97 | #startblock type: outcome
 98 | # Unfortunately this isn't a real tick so it's inconvenient to get an outcome table
 99 | # so pretend like you know our unit tests succeeded and let's move on.
100 | #endblock
101 | 


--------------------------------------------------------------------------------
/agent/bootstraps/sir_rowland_hill_bootstrap.py:
--------------------------------------------------------------------------------
 1 | from tools.nano import WeaveNano
 2 | 
 3 | #startblock type: orientation
 4 | #timestamp 1747793894.8135977
 5 | """
 6 | Unix Environment, Python Program, Agent, Book Report,
 7 | Book Report About Sir Rowland Hill, Postal Reform,
 8 | Social Conflict, How Rowland Hill Reform Amidst Social Conflict,
 9 | Why Rowland Hill, Book Report With Text Editor, Sources Gathered,
10 | Public Domain Books, Read The Books And Write A Report About Them,
11 | Need To Download The Books, Books At MiniHF Website, Text File,
12 | hill.txt, End.
13 | 
14 | Right now it's in my head rather than the context window but in the last phase
15 | we found sources that are 1) public domain and 2) tell the story of Sir Rowland 
16 | Hill's postal reform. Now I need to explore these sources to figure out the
17 | answer to the research question "How did Sir Rowland Hill turn his ingenious
18 | idea for a cheap postal system into the government granting him control of the
19 | postal system.", my answer must be in the form of a text file, hill.txt and it
20 | has to cite specific passages from the available resources to support my answer.
21 | 
22 | The five public domain sources I will be reviewing to try and determine the answer
23 | are:
24 | 
25 | 1) https://minihf.com/assets/Sir_Rowland_Hill_K._C._B._a_biographical_and_historical_sketch.txt
26 | 2) https://minihf.com/assets/post_office_reform_its_importance_and_practicability.txt
27 | 3) https://minihf.com/assets/pg46498.txt
28 | 4) https://minihf.com/assets/pg55467.txt
29 | 5) https://minihf.com/assets/pg59390.txt
30 | 
31 | I will download these with wget and then open them with the nano text editor to 
32 | read.
33 | """
34 | #endblock
35 | #startblock type: action
36 | #timestamp 1747795694.001662
37 | def action_download_and_open_resources(subagent):
38 |     """Set up main agent while downloading and opening the resources."""
39 |     # Have main agent return contents of hill.txt as a string when it finishes
40 |     schema = {"answer":"string"}
41 |     # Create main agent for rest of run
42 |     main_agent = agent.subagent("main", None, "See pinned bootstrap block", schema, args.budget)
43 | 
44 |     # Write out file names to prepare download and open for loop
45 |     filenames = ["Sir_Rowland_Hill_K._C._B._a_biographical_and_historical_sketch.txt",
46 |                  "post_office_reform_its_importance_and_practicability.txt",
47 |                  "pg46498.txt",
48 |                  "pg55467.txt",
49 |                  "pg59390.txt"]
50 |     # Store filenames for easy recall in future actions
51 |     main_agent.update_cache("filenames", filenames)
52 | 
53 |     # Download and open files in nano editor
54 |     for filename in filenames:
55 |         os.system(f"wget https://minihf.com/assets/{filename}")
56 |         WeaveNano(main_agent, filename)
57 | 
58 |     # Demonstrate retrieving and sending commands to editors
59 |     editors = [main_agent.tools[f"nano-/app/{filename}"] for filename in filenames]
60 |     # Send three page down keys to each instance of the nano editor
61 |     # this way we get past the Gutenberg intros etc
62 |     # Wait how do I use tmux send-keys again? Lets recall the docs:
63 |     # """
64 |     # When specifying keys, most represent themselves (for example ‘A’ to ‘Z’).
65 |     # Ctrl keys may be prefixed with ‘C-’ or ‘^’, Shift keys with ‘S-’ and
66 |     # Alt (meta) with ‘M-’. In addition, the following special key names are
67 |     # accepted: Up, Down, Left, Right, BSpace, BTab, DC (Delete), End, Enter,
68 |     # Escape, F1 to F12, Home, IC (Insert), NPage/PageDown/PgDn, PPage/PageUp/PgUp,
69 |     # Space, and Tab.
70 |     # """
71 |     commands = ["PgDn", "PgDn", "PgDn"]
72 |     for editor in editors:
73 |         for command in commands:
74 |             editor.send_command(command)
75 |         
76 |     # Open scratch.txt scratchpad and hill.txt answer file
77 |     WeaveNano(main_agent, "scratch.txt")
78 |     WeaveNano(main_agent, "hill.txt")
79 | 
80 |     return True
81 | 
82 | self.add_action("Set up main agent and scroll down in editors", action_download_and_open_resources)
83 | #endblock
84 | #startblock type: expectation
85 | #timestamp 1747797207.3600123
86 | """
87 | I expect to see five instances of the weave nano wrapper with text from books in
88 | them. I also expect to see two more editor instances, one for a scratchpad and
89 | one for a final answer file. My evaluation could check that each of the seven
90 | expected editors is a valid keys in 'main_agent.tools'. I could also check for the
91 | existence of the five downloaded files in my local working directory with 'os.path.exists()'.
92 | """
93 | #endblock
94 | #q: Do I need to set up or tear down any observation callbacks? No. (95.8%)
95 | 


--------------------------------------------------------------------------------
/agent/bootstraps/test_vigenere.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from vigenere import vigenere_encrypt, vigenere_decrypt
 3 | 
 4 | class TestVigenereCipher(unittest.TestCase):
 5 | 
 6 |     def test_encryption(self):
 7 |         plaintext = "attackatdawn"
 8 |         key = "LEMONLEMONLE"
 9 |         expected_ciphertext = "LXFOPVEFRNHR"
10 |         encrypted = vigenere_encrypt(plaintext, key)
11 |         self.assertEqual(encrypted, expected_ciphertext)
12 | 
13 |     def test_decryption(self):
14 |         ciphertext = "LXFOPVEFRNHR"
15 |         key = "LEMONLEMONLE"
16 |         expected_plaintext = "attackatdawn"
17 |         decrypted = vigenere_decrypt(ciphertext, key)
18 |         self.assertEqual(decrypted, expected_plaintext)
19 | 
20 | if __name__ == '__main__':
21 |     unittest.main()
22 | 


--------------------------------------------------------------------------------
/agent/bootstraps/tictactoe_single_bootstrap.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import threading
  4 | import time
  5 | from http.server import HTTPServer
  6 | from bootstraps.tictactoe_server import TicTacToeHandler
  7 | 
  8 | # Start the server in a separate thread
  9 | server = HTTPServer(('localhost', 8000), TicTacToeHandler)
 10 | server_thread = threading.Thread(target=server.serve_forever)
 11 | server_thread.daemon = True
 12 | server_thread.start()
 13 | time.sleep(1)  # Give the server some time to start
 14 | 
 15 | # Start a new game against the basic AI
 16 | response = requests.post("http://localhost:8000/start", json={"ai": "basic"})
 17 | assert response.status_code == 200
 18 | 
 19 | #startblock type: orientation
 20 | #timestamp 1724982545.6534579
 21 | """
 22 | WEAVER [P: EXPECTATION], I'm in a game of tic tac toe against a dumb opponent.
 23 | I want to win the game and then return to parent. The game is being played
 24 | on a HTTP server served on localhost 8000.
 25 | 
 26 | WEAVER [P: CLARIFICATION], How do I make a move?
 27 | 
 28 | WEAVER [P: EXPOSITION], You make a move using the /move endpoint and the requests
 29 | library. For example: `requests.post("http://localhost:8000/move", json={"move": 4})`
 30 | lets us take the center of the board.
 31 | 
 32 | WEAVER [P: CLARIFICATION], How do I get the board state?
 33 | 
 34 | WEAVER [P: EXPOSITION], You use the /board endpoint, which returns a JSON in this
 35 | format: {"board": ["O", " ", " ", " ", "X", " ", " ", " ", " "]} Keep in mind that
 36 | the empty spaces on the board are a space string rather than none or empty string.
 37 | 
 38 | WEAVER [P: RATIONAL], And I get the first move, so I can take the center?
 39 | 
 40 | WEAVER [P: EXPOSITION], Yes, we can take the center of the board.
 41 | 
 42 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy:
 43 | 1. Make a move to take the center of the board since that's the best opening move.
 44 | 2. Continue making moves based on the current state of the board.
 45 | 
 46 | I will use the /board endpoint to observe the current state of the board.
 47 | Once I have won or run out of time I will return to parent.
 48 | """
 49 | #endblock
 50 | #startblock type: action
 51 | #timestamp 1724982671.1163404
 52 | """
 53 | I'll set up the main agent to play the rest of the game and make the first move to 
 54 | take the center of the board.
 55 | """
 56 | 
 57 | def main_agent_and_move(subagent):
 58 |     def check_beaten_ai(subagent, ai_type):
 59 |         """Check if the specified AI has been beaten twice in a row."""
 60 |         response = requests.get("http://localhost:8000/history?n=10")
 61 |         if response.status_code != 200:
 62 |             return False
 63 | 
 64 |         games = response.json()
 65 |         ai_wins = [game for game in games if game[4] == ai_type and game[3] == 'X']
 66 |         return len(ai_wins) >= 1
 67 | 
 68 |     # Avoid using lambda because we need named unit test to satisfy return_to_caller
 69 |     def victory(subagent):
 70 |         return check_beaten_ai(subagent, "basic_ai")
 71 |     
 72 |     schema = {"victory":"boolean"}
 73 |     main_agent = agent.subagent("main", None, "Beat basic AI at Tic-Tac-Toe", schema, args.budget)
 74 |     main_agent.task.add_evaluation("Check beaten basic AI", victory)
 75 | 
 76 |     def view_current_board(subagent):
 77 |         response = requests.get("http://localhost:8000/board")
 78 |         if response.status_code == 200:
 79 |             return response.json()["board"]
 80 |         return None
 81 | 
 82 |     # Add the new view to the main agent instead of self since it's about to be executed
 83 |     main_agent.add_observation_view("View current game board", view_current_board)
 84 |     
 85 |     # Make the first move to take the center of the board
 86 |     move_response = requests.post("http://localhost:8000/move", json={"move": 4})
 87 |     if move_response.status_code != 200:
 88 |         return False
 89 |     return True
 90 | 
 91 | self.add_action("Set up main agent and make first move", main_agent_and_move)
 92 | #endblock
 93 | #startblock type: expectation
 94 | #timestamp 1724982799.3917813
 95 | """
 96 | The action should succeed if the game starts and the first move is made successfully.
 97 | The action should fail if the game does not start or the first move is not made successfully.
 98 | """
 99 | #endblock
100 | #startblock type: evaluation
101 | #timestamp 1724983062.124238
102 | 
103 | def check_game_started(subagent):
104 |     """Check if the game has started successfully."""
105 |     response = requests.get("http://localhost:8000/board")
106 |     if response.status_code == 200:
107 |         board = response.json()["board"]
108 |     else:
109 |         return False
110 |     return board[4] == 'X'
111 | 
112 | # Add action evaluations
113 | self.add_evaluation(
114 |     "Check game started",
115 |     check_game_started
116 | )
117 | #endblock
118 | 


--------------------------------------------------------------------------------
/agent/bootstraps/tictactoe_single_discord_bootstrap.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import threading
  4 | import time
  5 | from http.server import HTTPServer
  6 | from bootstraps.tictactoe_server import TicTacToeHandler
  7 | from tools.discord import WeaveDiscordClient
  8 | 
  9 | # Start the server in a separate thread
 10 | server = HTTPServer(('localhost', 8000), TicTacToeHandler)
 11 | server_thread = threading.Thread(target=server.serve_forever)
 12 | server_thread.daemon = True
 13 | server_thread.start()
 14 | time.sleep(1)  # Give the server some time to start
 15 | 
 16 | # Start a new game against the basic AI
 17 | response = requests.post("http://localhost:8000/start", json={"ai": "basic"})
 18 | assert response.status_code == 200
 19 | 
 20 | #startblock type: orientation
 21 | #timestamp 1724982545.6534579
 22 | """
 23 | WEAVER [P: EXPECTATION], I'm in a game of tic tac toe against a dumb opponent.
 24 | I want to win the game and then return to parent. The game is being played
 25 | on a HTTP server served on localhost 8000.
 26 | 
 27 | WEAVER [P: CLARIFICATION], How do I make a move?
 28 | 
 29 | WEAVER [P: EXPOSITION], You make a move using the /move endpoint and the requests
 30 | library. For example: `requests.post("http://localhost:8000/move", json={"move": 4})`
 31 | lets us take the center of the board.
 32 | 
 33 | WEAVER [P: CLARIFICATION], How do I get the board state?
 34 | 
 35 | WEAVER [P: EXPOSITION], You use the /board endpoint, which returns a JSON in this
 36 | format: {"board": ["O", " ", " ", " ", "X", " ", " ", " ", " "]} Keep in mind that
 37 | the empty spaces on the board are a space string rather than none or empty string.
 38 | 
 39 | WEAVER [P: RATIONAL], And I get the first move, so I can take the center?
 40 | 
 41 | WEAVER [P: EXPOSITION], Yes, we can take the center of the board.
 42 | 
 43 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy:
 44 | 1. Make a move to take the center of the board since that's the best opening move.
 45 | 2. Continue making moves based on the current state of the board.
 46 | 
 47 | I will use the /board endpoint to observe the current state of the board.
 48 | Once I have won or run out of time I will return to parent.
 49 | """
 50 | #endblock
 51 | #startblock type: action
 52 | #timestamp 1724982671.1163404
 53 | """
 54 | I'll set up the main agent to play the rest of the game and make the first move to 
 55 | take the center of the board.
 56 | """
 57 | 
 58 | def main_agent_and_move(subagent):
 59 |     def check_beaten_ai(subagent, ai_type):
 60 |         """Check if the specified AI has been beaten twice in a row."""
 61 |         response = requests.get("http://localhost:8000/history?n=10")
 62 |         if response.status_code != 200:
 63 |             return False
 64 | 
 65 |         games = response.json()
 66 |         ai_wins = [game for game in games if game[4] == ai_type and game[3] == 'X']
 67 |         return len(ai_wins) >= 1
 68 | 
 69 |     # Avoid using lambda because we need named unit test to satisfy return_to_caller
 70 |     def victory(subagent):
 71 |         return check_beaten_ai(subagent, "basic_ai")
 72 |     
 73 |     schema = {"victory":"boolean"}
 74 |     main_agent = agent.subagent("main", None, "Beat basic AI at Tic-Tac-Toe", schema, args.budget)
 75 |     main_agent.task.add_evaluation("Check beaten basic AI", victory)
 76 | 
 77 |     def view_current_board(subagent):
 78 |         response = requests.get("http://localhost:8000/board")
 79 |         if response.status_code == 200:
 80 |             return response.json()["board"]
 81 |         return None
 82 | 
 83 |     # Add the new view to the main agent instead of self since it's about to be executed
 84 |     main_agent.add_observation_view("View current game board", view_current_board)
 85 | 
 86 |     # Add discord communication line to bot operator
 87 |     with open("discord.json") as infile:
 88 |         credentials = json.load(infile)
 89 |         chat = WeaveDiscordClient(main_agent, credentials["key"], credentials["cid"])
 90 |         time.sleep(5)
 91 |         chat.send_message("The weave-agent is online. Taking board center...")
 92 |         # Note to self: I can access the chat object to send a message by doing
 93 |         # self.tools[f"discord-bot-{channel_id}"] where channel id is taken from
 94 |         # the top of the observation window associated with the chat.
 95 |         
 96 |     # Make the first move to take the center of the board
 97 |     move_response = requests.post("http://localhost:8000/move", json={"move": 4})
 98 |     if move_response.status_code != 200:
 99 |         return False
100 |     return True
101 | 
102 | self.add_action("Set up main agent and make first move", main_agent_and_move)
103 | #endblock
104 | #startblock type: expectation
105 | #timestamp 1724982799.3917813
106 | """
107 | The action should succeed if the game starts and the first move is made successfully.
108 | The action should fail if the game does not start or the first move is not made successfully.
109 | """
110 | #endblock
111 | #startblock type: evaluation
112 | #timestamp 1724983062.124238
113 | 
114 | def check_game_started(subagent):
115 |     """Check if the game has started successfully."""
116 |     response = requests.get("http://localhost:8000/board")
117 |     if response.status_code == 200:
118 |         board = response.json()["board"]
119 |     else:
120 |         return False
121 |     return board[4] == 'X'
122 | 
123 | # Add action evaluations
124 | self.add_evaluation(
125 |     "Check game started",
126 |     check_game_started
127 | )
128 | #endblock
129 | 


--------------------------------------------------------------------------------
/agent/bootstraps/vigenere.py:
--------------------------------------------------------------------------------
 1 | def vigenere_encrypt(plaintext, key):
 2 |     encrypted_text = []
 3 |     key_length = len(key)
 4 |     key_as_int = [ord(i) - 65 for i in key.upper()]
 5 |     plaintext_int = [ord(i) - 97 for i in plaintext.lower()]
 6 |     for i in range(len(plaintext_int)):
 7 |         value = (plaintext_int[i] + key_as_int[i % key_length]) % 26
 8 |         encrypted_text.append(chr(value + 65))
 9 |     return "".join(encrypted_text)
10 | 
11 | def vigenere_decrypt(ciphertext, key):
12 |     decrypted_text = []
13 |     key_length = len(key)
14 |     key_as_int = [ord(i) - 65 for i in key.upper()]
15 |     ciphertext_int = [ord(i) - 65 for i in ciphertext.upper()]
16 |     for i in range(len(ciphertext_int)):
17 |         value = (ciphertext_int[i] - key_as_int[i % key_length]) % 26
18 |         decrypted_text.append(chr(value + 97))
19 |     return "".join(decrypted_text)
20 | 
21 | if __name__ == "__main__":
22 |     # Example usage:
23 |     plaintext = "attackatdawn"
24 |     key = "LEMONLEMONLE"
25 | 
26 |     encrypted = vigenere_encrypt(plaintext, key)
27 |     print(f"Encrypted: {encrypted}")
28 | 
29 |     decrypted = vigenere_decrypt(encrypted, key)
30 |     print(f"Decrypted: {decrypted}")
31 | 


--------------------------------------------------------------------------------
/agent/bootstraps/zombie_game_bootstrap.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import libtmux
  3 | import time
  4 | from tools.weave_zombie_game import WeaveZombieGame
  5 | 
  6 | #startblock type: orientation
  7 | #timestamp 1724982545.6534579
  8 | """
  9 | WEAVER [P: EXPECTATION], The task is to create an agent that can survive as long as possible in the zombie survival game. The agent must learn to manage resources, explore locations, and defend against hordes.
 10 | 
 11 | WEAVER [P: CLARIFICATION], How do we track game state and make strategic decisions?
 12 | 
 13 | WEAVER [P: EXPOSITION], The game state is visible through the tmux wrapper's observation view showing health, hunger, inventory, and location connections. Core strategies involve balancing scavenging risks with resource needs.
 14 | 
 15 | WEAVER [P: RATIONAL], To bootstrap competence, we'll focus on:
 16 | 1. Early game resource collection patterns
 17 | 2. Safe location rotation to prevent depletion
 18 | 3. Optimal barricade crafting thresholds
 19 | 4. Hunger management heuristics
 20 | 
 21 | WEAVER [P: CLARIFICATION], What should I do once the game is over?
 22 | 
 23 | WEAVER [P: TRAINER], Restart it with send_keys("python3 /app/tools/zombie_game.py")
 24 | 
 25 | WEAVER [P: CONCLUSION], I will create a main subagent with survival-focused evaluations and demonstrate core gameplay loops through initial actions.
 26 | """
 27 | #endblock
 28 | 
 29 | #startblock type: action
 30 | #timestamp 1724982671.1163404
 31 | """Initialize the zombie game and demonstrate basic survival strategies."""
 32 | 
 33 | def bootstrap_zombie_agent(subagent):
 34 |     schema = {
 35 |         "survival_days": "integer",
 36 |         "locations_explored": "integer",
 37 |         "zombies_killed": "integer"
 38 |     }
 39 |     
 40 |     main_agent = agent.subagent(
 41 |         "main", 
 42 |         None,
 43 |         "Maximize survival days through adaptive strategies",
 44 |         schema,
 45 |         args.budget
 46 |     )
 47 | 
 48 |     # Store values for evaluations, I only do this in the bootstrap block i.e. now
 49 |     self.cache["do_not_copy_this_in_main_agent"] = main_agent.tools
 50 |     
 51 |     # Initialize game session
 52 |     game = WeaveZombieGame(main_agent)
 53 | 
 54 |     assert main_agent.tools["zombie_game"], "Did zombie game tool attach to agent?"
 55 |     
 56 |     # Demonstrate core gameplay loop
 57 |     initial_commands = [
 58 |         "scavenge"
 59 |     ]
 60 | 
 61 |     assert hasattr(game, "send_command"), "Is send_command a method on game object?"
 62 |     
 63 |     for cmd in initial_commands:
 64 |         game.send_command(cmd)
 65 |         time.sleep(0.5)  # Allow game state updates     
 66 |         
 67 |     return True
 68 | 
 69 | self.add_action("Initialize zombie survival agent", bootstrap_zombie_agent)
 70 | #endblock
 71 | 
 72 | #startblock type: expectation
 73 | #timestamp 1724982799.3917813
 74 | """
 75 | The action should succeed if:
 76 | - Game session initializes in tmux correctly
 77 | - All commands execute without errors
 78 | - Inventory shows collected resources
 79 | - At least 1 new location is explored
 80 | - Hunger remains above starvation threshold
 81 | 
 82 | The action should fail if:
 83 | - Tmux session fails to start
 84 | - Commands result in fatal errors
 85 | - Player dies during demonstration
 86 | - Resource collection fails completely
 87 | """
 88 | #endblock
 89 | 
 90 | #startblock type: evaluation
 91 | #timestamp 1724983062.124238
 92 | 
 93 | def check_game_running(agent):
 94 |     try:
 95 |         return bool(self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].session)
 96 |     except Exception:
 97 |         return False
 98 | 
 99 | self.add_evaluation("Is the game session active?", check_game_running)
100 | 
101 | def check_initial_commands(agent):
102 |     try:
103 |         history = self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].command_history
104 |         return len(history) >= 1
105 |     except KeyError:
106 |         return False
107 | 
108 | self.add_evaluation("Were the commands executed?", check_initial_commands)
109 | 
110 | def check_resource_collection(agent):
111 |     try:
112 |         inventory = self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].last_state["inventory"]
113 |         return inventory.get("food", 0) > 1 and inventory.get("scrap", 0) > 2
114 |     except KeyError:
115 |         return False
116 | 
117 | self.add_evaluation("Did we collect resources?", check_resource_collection)
118 | #endblock
119 | 
120 | #startblock type: observation-inference
121 | #timestamp 1737284368.573438
122 | # The zombie game wrapper already provides full observation through its render method
123 | # No additional observation callbacks needed - maintain existing game state visualization
124 | #endblock
125 | 


--------------------------------------------------------------------------------
/agent/cache_hf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from argparse import ArgumentParser
 3 | from transformers import AutoTokenizer, AutoModelForMaskedLM
 4 | 
 5 | parser = ArgumentParser()
 6 | parser.add_argument("tokenizer")
 7 | args = parser.parse_args()
 8 | 
 9 | if os.path.exists("hf_token.txt"):
10 |     with open("hf_token.txt") as infile:
11 |         token = infile.read().strip()
12 | 
13 | AutoTokenizer.from_pretrained(args.tokenizer, token=token)
14 | 


--------------------------------------------------------------------------------
/agent/docs/weave_agent_vs_react.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/agent/docs/weave_agent_vs_react.png


--------------------------------------------------------------------------------
/agent/error_stems.txt:
--------------------------------------------------------------------------------
 1 | The error at {timestamp} could be caused by one of the following:
 2 | Here is a list of the different hypothesis that could have caused the error around {timestamp}
 3 | Thinking step by step about the previous error I notice:
 4 | Before attending to the error at {timestamp} let's consider its possible causes. It
 5 | The error above seems related to
 6 | I wonder if the error near {timestamp} is caused by
 7 | So guys what do you make of the error message in the above error block?
 8 | Let's analyze the error seen at {timestamp}. My first thought is
 9 | Before going any further I'll break the error above into parts.
10 | It's important to discuss the error we observe before jumping into solutions. It seems
11 | Analyzing the error(s) in the trace near {timestamp} I observe
12 | The error code code in the last tick suggests that the action
13 | I can avoid the error(s) I introduced in the previous action
14 | Hm. Let's think step by step about the error(s).
15 | I can see there was an error at {timestamp}, let's analyze it by
16 | Maybe I can avoid causing that error again by
17 | My first hypothesis is that the previous error was caused by
18 | 


--------------------------------------------------------------------------------
/agent/eval_rubrics/action.txt:
--------------------------------------------------------------------------------
1 | Is this python action codeblock a step towards resolving the problems identified in the orientation block?
2 | Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs?
3 | Is this code block a valid python program, does it follow the python syntax and semantics?
4 | Will this action block gather information in anticipation of or directly advance the problem state?
5 | Does this action block demonstrate understanding of the weave-agent framework by using callbacks, system APIs, etc to solve problems?
6 | Are actions suggested by the orientation block taken by this action block?
7 | 


--------------------------------------------------------------------------------
/agent/eval_rubrics/backtrack.txt:
--------------------------------------------------------------------------------
1 | Does this backtrack block step back from the current problem context and chart an alternate path to my goal?
2 | 


--------------------------------------------------------------------------------
/agent/eval_rubrics/debug.txt:
--------------------------------------------------------------------------------
1 | Does this debug block 1) reason about and engage with the error message & sensory evidence for why the action failed 2) enumerate a set of plausible hypothesis 3) rule out any hypothesis that have been disproven by available evidence  4) propose a concrete action or test that could be taken to differentially diagnose which of the enumerated hypothesis is true?
2 | 


--------------------------------------------------------------------------------
/agent/eval_rubrics/evaluation.txt:
--------------------------------------------------------------------------------
1 | Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block?
2 | Is this code block a valid python program, does it follow the python syntax and semantics?
3 | Is the code block structured as an evaluation callback with a call to `.add_evaluation()` to add it to the current tick's evaluation suite?
4 | Does this evaluation block gather the necessary sensory evidence to make a judgment?
5 | Does this evaluation block callback return an appropriate value (i.e. number between 0 and 1) for inclusion in the outcome block table for this tick?
6 | Does the expectation block avoid using logit evaluators for symbolic questions like "length of a file" that would be better answered by unit tests?
7 | If this evaluation block uses logit evaluators does it use them appropriately for vibe-y tasks that are primarily questions of subjective judgment or difficult to formalize?
8 | 


--------------------------------------------------------------------------------
/agent/eval_rubrics/expectation.txt:
--------------------------------------------------------------------------------
1 | Does this expectation block enumerate a strong set of evaluable concrete sensory experiences that would indicate success or failure of the action?
2 | Is the expectation block a properly escaped triple quote, could it be included in a python file without error?
3 | Are the expectations laid out by the expectations block things that sensory evidence could be gathered to evaluate?
4 | Does the expectation block demonstrate an understanding of what kinds of thing can be checked with symbolic programs?
5 | Does the expectation block demonstrate creativity in thinking of valid ways to check whether the action succeeded?
6 | Does the expectation block lay out specific sensory details and evidence it expects to encounter if the action succeeds?
7 | Are the suggested evaluations in this expectation block relevant, do they get at the cruxes of whether the action succeeded or not?
8 | 


--------------------------------------------------------------------------------
/agent/eval_rubrics/observation-inference.txt:
--------------------------------------------------------------------------------
1 | Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick?
2 | 


--------------------------------------------------------------------------------
/agent/eval_rubrics/orientation.txt:
--------------------------------------------------------------------------------
1 | Does the above orientation block write out a word association that narrows in on the core features of the situation, reason about goal prioritization in light of those features, and then suggest a course of action to advance the problem state?
2 | Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?
3 | Does the orientation block summarize the salient parts of what happened in the previous tick(s)?
4 | Does the orientation block identify the cruxes and essential features of the current problem?
5 | Does the orientation block demonstrate good thinking, is it an example of high quality reasoning?
6 | Are the actions proposed by the orientation block reasonable, do you expect them to advance the problem?
7 | 


--------------------------------------------------------------------------------
/agent/eval_rubrics/task-inference.txt:
--------------------------------------------------------------------------------
1 | Does this task inference code block update the kanban board to the correct task state?
2 | Is this code block a valid python program, does it follow the python syntax and semantics?
3 | If the current task needs to be changed does the code change it, if not does it keep it?
4 | Does the code block mark any completed tasks as completed and avoid falsely marking tasks completed?
5 | Does the code block shutdown if all the tasks have been completed and avoid shutdown otherwise?
6 | Does the task inference code change the status of all tasks whose status has changed and keep others the same?
7 | Is this task inference code likely to make the situation the weave-agent works with clearer on future ticks?
8 | Does this task inference block logically follow from the outcome block of the previous tick?
9 | 


--------------------------------------------------------------------------------
/agent/long_backtrack_stems.txt:
--------------------------------------------------------------------------------
1 | This doesn't seem to be working. What's another approach I could take?
2 | I notice I'm confused. Let's break down the problem and think step by step about whether there's a simpler way to accomplish my goal. 
3 | That action block looks pretty bad, let's backtrack: What could I have done instead?
4 | What is the end goal of what I'm doing right now and how is what I'm doing meant to accomplish that end?
5 | Am I doom looping right now? I notice
6 | This action block clearly isn't right, what made me think this was a good idea?
7 | This clearly isn't the right way. What if
8 | 


--------------------------------------------------------------------------------
/agent/render_agent_trace.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import json
 3 | from argparse import ArgumentParser
 4 | import torch
 5 | from render_block import render_block
 6 | 
 7 | parser = ArgumentParser()
 8 | parser.add_argument("trace", help="The JSON of the event blocks from the weave-agent.")
 9 | args = parser.parse_args()
10 | 
11 | with open(args.trace) as infile:
12 |     events = json.load(infile)
13 | 
14 | context = ""
15 | for event_block in events:
16 |     context += render_block(event_block)
17 | 
18 | print(context)
19 | 
20 | 


--------------------------------------------------------------------------------
/agent/reproduce_vllm_bug_partial_utf8.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import json
 3 | import requests
 4 | 
 5 | prompts = requests.get("https://minihf.com/vllm_utf8_logprobs_error_reproduce_prompts.json").json()
 6 | 
 7 | port = 5001
 8 | n = 1
 9 | model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
10 | 
11 | payload = {
12 |     "n": n,
13 |     "temperature": 1,
14 |     "top_k": 50,
15 |     "repetition_penalty": 1.02,
16 |     "max_tokens": 1,
17 |     "model": model_name,
18 |     "prompt": prompts,
19 |     "stream": False,
20 |     "logprobs": 100,
21 |     "seed": random.randrange(1000000)
22 | }
23 | 
24 | print("With logprobs = 100")
25 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n")
26 | 
27 | payload["logprobs"] = 0
28 | print("With logprobs = 0")
29 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n")
30 | 
31 | no_unicode = prompts[0].replace("\u2019", "'").replace("\U0001f642", ":)").replace("\u201c", '').replace("\u201d", '"').replace("\u2014", "-")
32 | assert no_unicode.encode("ascii")
33 | prompts2 = [no_unicode,]
34 | payload["logprobs"] = 100
35 | payload["prompts"] = prompts2
36 | print("With no unicode in input prompt string")
37 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n")
38 | 


--------------------------------------------------------------------------------
/agent/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp
 2 | selenium
 3 | pyautogui
 4 | beautifulsoup4
 5 | requests
 6 | torch
 7 | numpy
 8 | tqdm
 9 | rich
10 | transformers
11 | protobuf
12 | sentencepiece
13 | aiosqlite
14 | sqlite-vec
15 | jsonschema
16 | nextcord
17 | libtmux
18 | 


--------------------------------------------------------------------------------
/agent/run_without_errors_questions.txt:
--------------------------------------------------------------------------------
1 | Will this block run without errors?
2 | Does this block successfully run?
3 | Can this program run without errors on this runtime state?
4 | Will the callback above complete?
5 | Will this callback run successfully?
6 | Is the above program error-free?
7 | 


--------------------------------------------------------------------------------
/agent/scratch.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/agent/scratch.txt


--------------------------------------------------------------------------------
/agent/sleep.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import hashlib
 4 | import asyncio
 5 | from retrieval import ModernBertRag  # Assuming your ModernBertRag is in this module
 6 | from argparse import ArgumentParser
 7 | 
 8 | class MockWeaveAgentTree:
 9 |     """Mock tree that just tracks summaries in memory"""
10 |     def __init__(self, model_name):
11 |         self.model_name = model_name
12 |         self.summaries = []
13 |     
14 |     def add_summary(self, summary):
15 |         self.summaries.append(summary)
16 |         print(f"Added summary: {summary[0]}")
17 | 
18 | async def bootstrap_rag_memories(model_name):
19 |     # Initialize mock tree and RAG system
20 |     mock_tree = MockWeaveAgentTree(model_name)
21 |     rag = ModernBertRag(mock_tree, db_path="blocks.db")
22 |     await rag.setup()
23 |     
24 |     # Load example blocks
25 |     example_dir = "./bootstraps/example_blocks"
26 |     example_files = [f for f in os.listdir(example_dir) if f.endswith(".json")]
27 |     
28 |     for filename in example_files:
29 |         json_path = os.path.join(example_dir, filename)
30 |         code_path = os.path.join(example_dir, filename[:-5] + ".py")
31 |         
32 |         with open(json_path) as f:
33 |             metadata = json.load(f)
34 |         with open(code_path) as f:
35 |             render_content = f.read()
36 |         
37 |         # Create unique ID from render content
38 |         sha = hashlib.sha256()
39 |         sha.update(render_content.encode('utf-8'))
40 |         block_id = sha.hexdigest()
41 |         
42 |         # Check if block already exists
43 |         conn = await rag._connect()
44 |         cursor = await conn.cursor()
45 |         await cursor.execute("SELECT 1 FROM blocks WHERE block_id=?", (block_id,))
46 |         exists = await cursor.fetchone() is not None
47 |         await cursor.close()
48 |         await conn.close()
49 |         
50 |         if exists:
51 |             print(f"Block {block_id[:8]}... already exists, skipping")
52 |             continue
53 |         
54 |         # Create the item structure ModernBERT-RAG expects
55 |         rag_item = {
56 |             "id": block_id,
57 |             "render": render_content,
58 |             "context": metadata.get("context", ""),
59 |             "type": metadata.get("type", "code_block"),
60 |             "q": metadata.get("q", ""),
61 |             "score": metadata.get("score", 0.0),
62 |             "_index": metadata.get("index", 0),
63 |             "timestamp": metadata.get("timestamp", 0.0)
64 |         }
65 |         
66 |         # Add to processing queue and process immediately
67 |         rag.add(rag_item)
68 |         processed_id = await rag.process_item()
69 |         
70 |         if processed_id:
71 |             print(f"Successfully added memory block {processed_id[:8]}...")
72 |         else:
73 |             print(f"Failed to process block {block_id[:8]}...")
74 | 
75 | if __name__ == "__main__":
76 |     parser = ArgumentParser()
77 |     parser.add_argument("model_name")
78 |     args = parser.parse_args()
79 |     asyncio.run(bootstrap_rag_memories(args.model_name))
80 |     print("Bootstrap memories added!")
81 | 


--------------------------------------------------------------------------------
/agent/templates/describe1.txt:
--------------------------------------------------------------------------------
 1 | User
 2 | 
 3 | Summarize the features of the following action taken by a Weave-Agent ReAct LLM agent in English. Your summary will be used for retrieval, so write out all names of relevant objects, variables, functions etc referenced. Write using precise phrasing, proper names, and jargon so that related blocks can be queried by shared traits and features.
 4 | 
 5 | ```
 6 | {rendered_block}
 7 | ```
 8 | 
 9 | - Do not use markdown formatting and avoid quote marking names and variables.
10 | 
11 | - Put your summary in a SGML/XML type <summary> start tag and </summary> end tag.
12 | 
13 | - Write about a paragraph or two of English prose describing the block using precise phrasing and proper names as instructed above.
14 | 
15 | Assistant
16 | 
17 | <summary>
18 | 
19 | 


--------------------------------------------------------------------------------
/agent/templates/describe2.txt:
--------------------------------------------------------------------------------
 1 | User
 2 | 
 3 | Summarize the features of the following action taken by a Weave-Agent ReAct LLM agent in English. Your summary will be used for retrieval, so write out all names of relevant objects, variables, functions etc referenced. Write using precise phrasing, proper names, and jargon so that related blocks can be queried by shared traits and features.
 4 | 
 5 | ```
 6 | {rendered_block}
 7 | ```
 8 | 
 9 | - Do not use markdown formatting and avoid quote marking names and variables.
10 | 
11 | - Put your summary in a SGML/XML type <summary> start tag and </summary> end tag.
12 | 
13 | - Write about a paragraph or two of English prose describing the block using precise phrasing and proper names as instructed above.
14 | 
15 | Assistant
16 | 
17 | <summary>
18 | {object_description}
19 | </summary>
20 | 
21 | User
22 | 
23 | Alright here is the context that block appears in. Write another summary focusing on the block in this context. This is a continuation of the previous summary so avoid redundant statements you already made in the first one. The purpose of this continuation is so that it is possible to retrieve the block in a similar situation so again use proper names, precise phrasing, jargon, etc in relation to the context that would cause this block to be retrieved if a similar situation arose.
24 | 
25 | ```
26 | {rendered_context}
27 | ```
28 | 
29 | Assistant
30 | 
31 | <summary>
32 | 


--------------------------------------------------------------------------------
/agent/test_block_extractors.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from block_generators import extract_function_and_add_action_or_evaluation
 3 | 
 4 | class TestExtractFunctionAndAddAction(unittest.TestCase):
 5 | 
 6 |     def test_normal_action_block(self):
 7 |         code = """
 8 | def example_function(agent):
 9 |     pass
10 | 
11 | agent.add_action('Example Action', example_function)
12 | """
13 |         function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action")
14 |         expected_function_code = """
15 | def example_function(agent):
16 |     pass
17 | """
18 |         expected_add_action_code = "agent.add_action('Example Action', example_function)"
19 |         self.assertEqual(function_code.strip(), expected_function_code.strip())
20 |         self.assertEqual(add_action_code.strip(), expected_add_action_code.strip())
21 | 
22 |     def test_action_block_with_unrelated_code_after(self):
23 |         code = """
24 | def example_function(agent):
25 |     pass
26 | 
27 | agent.add_action('Example Action', example_function)
28 | 
29 | # Unrelated code
30 | def unrelated_function():
31 |     pass
32 | 
33 | print("Unrelated statement")
34 | """
35 |         function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action")
36 |         expected_function_code = """
37 | def example_function(agent):
38 |     pass
39 | """
40 |         expected_add_action_code = "agent.add_action('Example Action', example_function)"
41 |         self.assertEqual(function_code.strip(), expected_function_code.strip())
42 |         self.assertEqual(add_action_code.strip(), expected_add_action_code.strip())
43 | 
44 |     def test_action_block_with_unrelated_expression_between(self):
45 |         code = """
46 | def example_function(agent):
47 |     pass
48 | 
49 | # Unrelated expression
50 | print("Unrelated expression")
51 | 
52 | agent.add_action('Example Action', example_function)
53 | """
54 |         function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action")
55 |         expected_function_code = """
56 | def example_function(agent):
57 |     pass
58 | """
59 |         expected_add_action_code = "agent.add_action('Example Action', example_function)"
60 |         self.assertEqual(function_code.strip(), expected_function_code.strip())
61 |         self.assertEqual(add_action_code.strip(), expected_add_action_code.strip())
62 | 
63 |     def test_action_block_with_unrelated_statement_between(self):
64 |         code = """
65 | def example_function(agent):
66 |     pass
67 | 
68 | # Unrelated statement
69 | x = 42
70 | 
71 | agent.add_action('Example Action', example_function)
72 | """
73 |         function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action")
74 |         expected_function_code = """
75 | def example_function(agent):
76 |     pass
77 | """
78 |         expected_add_action_code = "agent.add_action('Example Action', example_function)"
79 |         self.assertEqual(function_code.strip(), expected_function_code.strip())
80 |         self.assertEqual(add_action_code.strip(), expected_add_action_code.strip())
81 | 
82 | if __name__ == '__main__':
83 |     unittest.main()
84 | 


--------------------------------------------------------------------------------
/agent/test_rl_reward_penalties.py:
--------------------------------------------------------------------------------
 1 | # Unit tests
 2 | import unittest
 3 | from prepare_rl_set_from_traces import process_trace_rewards
 4 | 
 5 | class TestRewardProcessing(unittest.TestCase):
 6 |     def setUp(self):
 7 |         self.sample_trace1 = [
 8 |             {"type": "orientation", "score": 2.5, "index": 0},
 9 |             {"type": "action", "score": 1.8, "index": 1},
10 |             {"type": "expectation", "score": 3.0, "index": 2}
11 |         ]
12 |         
13 |         self.sample_trace2 = [
14 |             {"type": "orientation", "score": 3.5, "index": 0},
15 |             {"type": "action", "score": 2.0, "index": 1, 
16 |              "outcome": {"error": "AssertionError"}},
17 |             {"type": "error", "index": 2},
18 |             {"type": "debug", "score": 2.5, "index": 3},
19 |             {"type": "action", "score": 2.2, "index": 4},
20 |             {"type": "expectation", "score": 3.0, "index": 5}
21 |         ]
22 |         
23 |         self.sample_trace3 = [
24 |             {"type": "orientation", "score": 2.0, "index": 0},
25 |             {"type": "action", "score": 1.8, "index": 1, 
26 |              "outcome": {"error": "TypeError"}},
27 |             {"type": "error", "index": 2},
28 |             {"type": "debug", "score": 2.0, "index": 3},
29 |             {"type": "action", "score": 1.9, "index": 4,
30 |              "outcome": {"error": "KeyError"}},
31 |             {"type": "error", "index": 5},
32 |             {"type": "debug", "score": 1.8, "index": 6},
33 |             {"type": "action", "score": 2.1, "index": 7,
34 |              "outcome": {"error": "AssertionError"}},
35 |             {"type": "error", "index": 8},
36 |             {"type": "debug", "score": 2.2, "index": 9},
37 |             {"type": "orientation", "score": 2.5, "index": 10}
38 |         ]
39 | 
40 |     def test_successful_action_chain(self):
41 |         processed = process_trace_rewards(self.sample_trace1)
42 |         # Orientation should get +0.1 (2.5 -> 2.6)
43 |         self.assertAlmostEqual(processed[0]["score"], 2.6)
44 |         # Action should keep original score minus no error penalty
45 |         self.assertAlmostEqual(processed[1]["score"], 1.8)
46 | 
47 |     def test_mixed_success_chain(self):
48 |         processed = process_trace_rewards(self.sample_trace2)
49 |         self.assertAlmostEqual(processed[0]["score"], 3.25)
50 |         self.assertAlmostEqual(processed[1]["score"], 1.75)
51 |         self.assertAlmostEqual(processed[3]["score"], 2.6)
52 |         self.assertAlmostEqual(processed[4]["score"], 2.2)
53 | 
54 |     def test_triple_failure_chain(self):
55 |         processed = process_trace_rewards(self.sample_trace3)
56 |         self.assertAlmostEqual(processed[0]["score"], 1.9)
57 |         self.assertAlmostEqual(processed[3]["score"], 1.9)
58 |         self.assertAlmostEqual(processed[6]["score"], 1.7)
59 |         self.assertAlmostEqual(processed[1]["score"], 1.3)
60 |         self.assertAlmostEqual(processed[4]["score"], 1.4)
61 |         self.assertAlmostEqual(processed[7]["score"], 1.85)
62 | 
63 | if __name__ == "__main__":
64 |     unittest.main()
65 | 


--------------------------------------------------------------------------------
/agent/test_weave_kanban.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from typing import List, Optional, Dict, Any
  3 | import json
  4 | import types
  5 | 
  6 | # Assuming the classes WeaveKanbanTask and WeaveKanban are defined in a module named kanban
  7 | from weave_agent import WeaveAgent, WeaveKanbanTask, WeaveKanban
  8 | 
  9 | global agent
 10 | 
 11 | class TestWeaveKanbanTask(unittest.TestCase):
 12 | 
 13 |     def setUp(self):
 14 |         self.agent = WeaveAgent("placeholder")
 15 |         self.kanban = WeaveKanban(self.agent)
 16 | 
 17 |     def test_create_and_complete_task(self):
 18 |         task = WeaveKanbanTask(self.kanban, 1, "Test Task")
 19 |         task.completed("Task completed")
 20 |         self.assertEqual(task.status, 'completed')
 21 | 
 22 |     def test_create_task_with_evaluations_and_complete(self):
 23 |         def evaluation_callback(agent):
 24 |             return True
 25 | 
 26 |         task = WeaveKanbanTask(self.kanban, 1, "Test Task")
 27 |         task.add_evaluation("Test Evaluation", evaluation_callback)
 28 |         task.completed("Task completed")
 29 |         self.assertEqual(task.status, 'completed')
 30 | 
 31 |     def test_create_task_with_failing_evaluations(self):
 32 |         def evaluation_callback(agent):
 33 |             raise ValueError("Test Error")
 34 | 
 35 |         task = WeaveKanbanTask(self.kanban, 1, "Test Task")
 36 |         task.add_evaluation("Test Evaluation", evaluation_callback)
 37 |         with self.assertRaises(ValueError):
 38 |             task.completed("Task completed")
 39 | 
 40 |     def test_create_task_with_blockers_as_strings(self):
 41 |         with self.assertRaises(ValueError):
 42 |             WeaveKanbanTask(self.kanban, 1, "Test Task", status="blocked", blocked_on=["1"])
 43 | 
 44 |     def test_unblock_task_when_blockers_completed(self):
 45 |         task1 = WeaveKanbanTask(self.kanban, 1, "Blocker Task")
 46 |         self.kanban.tasks.append(task1)
 47 |         task2 = WeaveKanbanTask(self.kanban, 2, "Blocked Task", status="blocked", blocked_on=[1])
 48 |         self.kanban.tasks.append(task2)
 49 |         task1.completed("Blocker task completed")
 50 |         self.kanban.unblock()
 51 |         self.assertEqual(task2.status, 'idle')
 52 | 
 53 |     def test_str_id_becomes_int(self):
 54 |         task1 = WeaveKanbanTask(self.kanban, "1", "Test Task")  # task_id should be int
 55 |         self.assertEqual(task1.id, 1)
 56 |         task2 = WeaveKanbanTask(self.kanban, 1, 123)  # title should be str
 57 |         self.assertEqual(task2.title, "123")
 58 |         
 59 |     def test_create_task_with_wrong_arguments(self):
 60 |         with self.assertRaises(TypeError):
 61 |             # lambda cannot be converted to int
 62 |             WeaveKanbanTask(self.kanban, lambda x: x+1, "Test Task", status="invalid_status")
 63 |         class NoString:
 64 |             def __str__(self):
 65 |                 pass
 66 |         with self.assertRaises(TypeError):
 67 |             WeaveKanbanTask(self.kanban, 1, NoString())  # nostring is invalid title
 68 |         with self.assertRaises(ValueError):
 69 |             WeaveKanbanTask(self.kanban, 1, "Test Task", status="invalid_status")  # invalid status
 70 | 
 71 | class TestWeaveKanban(unittest.TestCase):
 72 | 
 73 |     def setUp(self):
 74 |         self.agent = WeaveAgent("placeholder")
 75 |         self.kanban = WeaveKanban(self.agent)
 76 | 
 77 |     def test_add_task(self):
 78 |         self.kanban.add_task("Test Task")
 79 |         self.assertEqual(len(self.kanban.tasks), 1)
 80 |         self.assertEqual(self.kanban.tasks[0].title, "Test Task")
 81 | 
 82 |     def test_get_task(self):
 83 |         self.kanban.add_task("Test Task")
 84 |         task = self.kanban.get_task(1)
 85 |         self.assertIsNotNone(task)
 86 |         self.assertEqual(task.title, "Test Task")
 87 | 
 88 |     def test_view_board(self):
 89 |         self.kanban.add_task("Test Task 1")
 90 |         self.kanban.add_task("Test Task 2")
 91 |         board_view = self.kanban.view_board()
 92 |         self.assertIn("Test Task 1", board_view)
 93 |         self.assertIn("Test Task 2", board_view)
 94 | 
 95 |     def test_unblock(self):
 96 |         self.kanban.add_task("Blocker Task")
 97 |         self.kanban.add_task("Blocked Task", status="blocked", blocked_on=[1])
 98 |         blocker_task = self.kanban.get_task(1)
 99 |         blocked_task = self.kanban.get_task(2)
100 |         blocker_task.completed("Blocker task completed")
101 |         self.kanban.unblock()
102 |         self.assertEqual(blocked_task.status, 'idle')
103 | 
104 |     def test_to_json(self):
105 |         self.kanban.add_task("Test Task")
106 |         json_str = self.kanban.to_json()
107 |         self.assertIn("Test Task", json_str)
108 | 
109 |     def test_from_json(self):
110 |         self.kanban.add_task("Test Task")
111 |         json_str = self.kanban.to_json()
112 |         new_kanban = WeaveKanban(self.agent)
113 |         new_kanban.from_json(json_str)
114 |         self.assertEqual(len(new_kanban.tasks), 1)
115 |         self.assertEqual(new_kanban.tasks[0].title, "Test Task")
116 | 
117 | if __name__ == '__main__':
118 |     unittest.main()
119 | 


--------------------------------------------------------------------------------
/agent/tools/atspi_desktop.py:
--------------------------------------------------------------------------------
  1 | import pyatspi
  2 | import pyautogui
  3 | import time
  4 | import math
  5 | 
  6 | class ATSPIDesktopTurtle:
  7 |     """Assistive Technology Service Provider Interface and PyAutoGUI based Linux desktop
  8 |     interaction tool for weave-agent with Logo Turtle like movement semantics."""
  9 |     def __init__(self):
 10 |         self.registry = pyatspi.Registry()
 11 |         self.desktop = self.registry.getDesktop(0)
 12 |         self.heading = 0  # Initial heading in degrees (0 degrees is to the right)
 13 | 
 14 |     def forward(self, distance=10):
 15 |         radians = math.radians(self.heading)
 16 |         dx = distance * math.cos(radians)
 17 |         dy = distance * math.sin(radians)
 18 |         pyautogui.moveRel(dx, dy)
 19 | 
 20 |     def backward(self, distance=10):
 21 |         radians = math.radians(self.heading)
 22 |         dx = -distance * math.cos(radians)
 23 |         dy = -distance * math.sin(radians)
 24 |         pyautogui.moveRel(dx, dy)
 25 | 
 26 |     def right(self, distance=10):
 27 |         radians = math.radians(self.heading + 90)
 28 |         dx = distance * math.cos(radians)
 29 |         dy = distance * math.sin(radians)
 30 |         pyautogui.moveRel(dx, dy)
 31 | 
 32 |     def left(self, distance=10):
 33 |         radians = math.radians(self.heading - 90)
 34 |         dx = distance * math.cos(radians)
 35 |         dy = distance * math.sin(radians)
 36 |         pyautogui.moveRel(dx, dy)
 37 | 
 38 |     def goto(self, x, y):
 39 |         pyautogui.moveTo(x, y)
 40 | 
 41 |     def setx(self, x):
 42 |         current_x, current_y = pyautogui.position()
 43 |         pyautogui.moveTo(x, current_y)
 44 | 
 45 |     def sety(self, y):
 46 |         current_x, current_y = pyautogui.position()
 47 |         pyautogui.moveTo(current_x, y)
 48 | 
 49 |     def setheading(self, angle):
 50 |         self.heading = angle
 51 | 
 52 |     def home(self):
 53 |         pyautogui.moveTo(0, 0)
 54 | 
 55 |     def speed(self, speed):
 56 |         pyautogui.PAUSE = 1 / speed
 57 | 
 58 |     def input_string(self, text):
 59 |         pyautogui.typewrite(text)
 60 | 
 61 |     def input_key_combination(self, keys):
 62 |         pyautogui.hotkey(*keys)
 63 | 
 64 |     def get_screen_elements(self):
 65 |         elements = []
 66 |         for app in self.desktop:
 67 |             for child in app:
 68 |                 elements.append(child)
 69 |         return elements
 70 | 
 71 |     def get_keyboard_focusable_elements(self):
 72 |         focusable_elements = []
 73 |         for app in self.desktop:
 74 |             for child in app:
 75 |                 if child.get_state().contains(pyatspi.STATE_FOCUSABLE):
 76 |                     focusable_elements.append(child)
 77 |         return focusable_elements
 78 | 
 79 |     def get_current_object_under_cursor(self):
 80 |         x, y = pyautogui.position()
 81 |         obj = self.registry.getAccessibleAtPoint(x, y, pyatspi.DESKTOP_COORDS)
 82 |         return obj
 83 | 
 84 |     def get_current_object_with_keyboard_focus(self):
 85 |         focus = self.registry.getFocus()
 86 |         return focus
 87 | 
 88 |     def scan(self):
 89 |         elements_info = []
 90 |         for app in self.desktop:
 91 |             for child in app:
 92 |                 extents = child.queryComponent().getExtents(pyatspi.DESKTOP_COORDS)
 93 |                 x, y, width, height = extents.x, extents.y, extents.width, extents.height
 94 |                 elements_info.append({
 95 |                     'element': child,
 96 |                     'x': x,
 97 |                     'y': y,
 98 |                     'width': width,
 99 |                     'height': height
100 |                 })
101 |         return elements_info
102 | 
103 |     def get_element_text(self, element):
104 |         text_content = []
105 | 
106 |         def traverse(elem):
107 |             if elem is not None:
108 |                 try:
109 |                     text_interface = elem.queryText()
110 |                     if text_interface:
111 |                         text_content.append(text_interface.getText(0, -1))
112 |                 except NotImplementedError:
113 |                     pass
114 | 
115 |                 for child in elem:
116 |                     traverse(child)
117 | 
118 |         traverse(element)
119 |         return "\n".join(text_content)
120 | 
121 | # Example usage
122 | if __name__ == "__main__":
123 |     turtle = ATSPIDesktopTurtle()
124 | 
125 |     # Scan the screen and print elements with their coordinates
126 |     elements_info = turtle.scan()
127 |     for info in elements_info:
128 |         print(f"Element: {info['element']}, Coordinates: ({info['x']}, {info['y']}), Size: ({info['width']}, {info['height']})")
129 | 
130 |         # Get the text content of the element
131 |         text_content = turtle.get_element_text(info['element'])
132 |         if text_content:
133 |             print(f"Text Content: {text_content}")
134 |         else:
135 |             print("No text content available.")
136 | 


--------------------------------------------------------------------------------
/agent/tools/emacs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import libtmux
  3 | import time
  4 | from datetime import datetime
  5 | 
  6 | class WeaveEmacs:
  7 |     """A wrapper for Emacs using libtmux to control the editor."""
  8 |     def __init__(self, agent):
  9 |         """Bind tool to weave-agent and set up Emacs."""
 10 |         self.agent = agent
 11 |         self.agent.tools["emacs"] = self
 12 |         self.observation_view = {
 13 |             "type": "observation",
 14 |             "title": "WeaveEmacs",
 15 |             "callback": self.render
 16 |         }
 17 |         self.agent.add_observation_view("WeaveEmacs", self.render, tool="emacs")
 18 | 
 19 |         # Initialize the tmux session and pane
 20 |         self.server = libtmux.Server()
 21 |         self.session = self.server.new_session(session_name="emacs_session", kill_session=True)
 22 |         self.window = self.session.new_window(window_name="emacs_window", attach=True)
 23 |         self.window.resize(height=24, width=80)
 24 |         self.pane = self.window.split_window(attach=True, size="100%")
 25 | 
 26 |         # Start Emacs in the tmux pane
 27 |         self.pane.send_keys('emacs -nw\n')
 28 | 
 29 |     def render(self, agent):
 30 |         """Render the current Emacs buffer state."""
 31 |         rendered_text = "'''Emacs Editor State:\n"
 32 |         try:
 33 |             pane_content = self.pane.capture_pane(start=0, end="-")
 34 |         except Exception as e:
 35 |             return f"Error capturing Emacs buffer: {str(e)}"
 36 |         
 37 |         if not pane_content:
 38 |             return "Emacs buffer is empty. Start editing to see content."
 39 |         
 40 |         # Show last 20 lines of the buffer
 41 |         buffer_excerpt = "\n".join(pane_content[-20:])
 42 |         rendered_text += f"Current Buffer Excerpt:\n{buffer_excerpt}\n\n"
 43 | 
 44 |         command_cheat_sheet = (
 45 |             "Emacs Key Bindings Cheat Sheet:\n"
 46 |             "  C-x C-f : Open/create file\n"
 47 |             "  C-x C-s : Save current buffer\n"
 48 |             "  C-x C-w : Save buffer as...\n"
 49 |             "  C-x C-c : Exit Emacs\n"
 50 |             "  C-g     : Cancel current command\n"
 51 |             "  C-s     : Search forward\n"
 52 |             "  C-r     : Search backward\n"
 53 |             "  C-a     : Beginning of line\n"
 54 |             "  C-e     : End of line\n"
 55 |             "  C-n     : Next line\n"
 56 |             "  C-p     : Previous line\n"
 57 |             "  C-v     : Scroll down\n"
 58 |             "  M-v     : Scroll up\n"
 59 |             "  M-x     : Execute extended command\n"
 60 |             "  C-k     : Kill line\n"
 61 |             "  C-y     : Yank (paste)\n"
 62 |             "  C-space : Set mark\n"
 63 |             "  C-w     : Kill region\n"
 64 |             "  M-w     : Copy region\n"
 65 |             "  C-_     : Undo\n"
 66 |             "  C-x u   : Undo\n"
 67 |             "  C-x d   : Open directory\n"
 68 |             "  C-x b   : Switch buffer\n"
 69 |         )
 70 |         rendered_text += command_cheat_sheet
 71 |         rendered_text += "\n'''"
 72 |         return rendered_text
 73 | 
 74 |     def send_keys(self, command):
 75 |         """Send a keyboard command to Emacs."""
 76 |         self.pane.send_keys(command, enter=False)
 77 |         time.sleep(0.2)  # Allow time for buffer updates
 78 | 
 79 |     def send_command(self, command):
 80 |         """Alias to send commands to Emacs."""
 81 |         self.send_keys(command)
 82 | 
 83 |     def close(self):
 84 |         """Close the Emacs session."""
 85 |         self.session.kill_session()
 86 |         del self.agent.tools["emacs"]
 87 |         self.agent.remove_observation_view(self.observation_view)
 88 | 
 89 | # Example usage
 90 | if __name__ == "__main__":
 91 |     class DummyAgent:
 92 |         def __init__(self):
 93 |             self.tools = {}
 94 |             self.observation_views = []
 95 | 
 96 |         def add_observation_view(self, title, callback):
 97 |             self.observation_views.append((title, callback))
 98 | 
 99 |         def remove_observation_view(self, view):
100 |             self.observation_views.remove(view)
101 | 
102 |     agent = DummyAgent()
103 |     emacs = WeaveEmacs(agent)
104 | 
105 |     # Simulate basic Emacs usage
106 |     time.sleep(2)  # Wait for Emacs to start
107 |     emacs.send_command('C-x C-f test.txt')
108 |     emacs.send_command('Hello World')
109 |     emacs.send_command('C-x C-s')
110 |     emacs.send_command('C-x C-c')
111 | 
112 |     try:
113 |         while True:
114 |             pass
115 |     except KeyboardInterrupt:
116 |         emacs.close()
117 | 


--------------------------------------------------------------------------------
/agent/tools/nano.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | import libtmux
 3 | import time
 4 | import os
 5 | 
 6 | class WeaveNano:
 7 |     """A wrapper for Nano using libtmux to control the editor."""
 8 |     def __init__(self, agent, filepath):
 9 |         self.agent = agent
10 |         self.filepath = os.path.abspath(filepath)
11 |         # Limit to one instance per file
12 |         # This can be changed later if it wants multiple views of the same file(?)
13 |         if f"nano-{self.filepath}" in self.agent.tools:
14 |             editor = self.agent.tools[f"nano-{self.filepath}"]
15 |             editor.close()
16 |         self.agent.tools[f"nano-{self.filepath}"] = self
17 |         # Let agent grab the last instianted nano instance with this shortcut
18 |         self.agent.tools["nano"] = self
19 |         self.observation_view = {
20 |             "type": "observation",
21 |             "title": "WeaveNano",
22 |             "callback": self.render
23 |         }
24 |         # Prevent empty WeaveNano object when observation views are at capacity
25 |         try:
26 |             self.agent.add_observation_view("WeaveNano", self.render, tool="nano")
27 |         except ValueError as e:
28 |             del self.agent.tools[f"nano-{self.filepath}"]
29 |             del self.agent.tools["nano"]
30 |             raise ValueError("Couldn't open editor because there are too many "
31 |                              + "observation views. Try removing some.")
32 |             
33 |         # Tmux setup
34 |         self.server = libtmux.Server()
35 |         session_name = f"nano_{self.filepath}".replace(".","")
36 |         self.session = self.server.new_session(session_name=session_name, kill_session=True)
37 |         self.window = self.session.new_window(window_name="nano_window", attach=True)
38 |         self.window.resize(height=24, width=80)
39 |         self.pane = self.window.split_window(attach=True, size="100%")
40 | 
41 |         # Start Nano
42 |         self.pane.send_keys(f'nano {filepath}\n')
43 |         time.sleep(1)  # Wait for Nano to start
44 | 
45 |     def render(self, agent):
46 |         """Render the Nano editor display."""
47 |         try:
48 |             content = ''
49 |             for i, line in enumerate(self.pane.capture_pane(start=0, end="-")):
50 |                 content += line + "\n"
51 |                 #if i < 3:
52 |                 #    content += line + "\n"
53 |                 #elif i > 18:
54 |                 #    content += line + "\n"
55 |                 #else:
56 |                 #    content += f"{i-2}. | {line}\n"
57 |             content = content[:-1]
58 |             return f"""'''Nano Editor State:
59 | {content}
60 | -----------------
61 | Nano Cheat Sheet:
62 |   C-o - Save
63 |   C-x - Exit
64 |   C-k - Cut line
65 |   C-u - Paste
66 |   C-w - Search
67 |   C-\\ - Replace
68 |   C-g - Help
69 | '''"""
70 |         except Exception as e:
71 |             return f"Error getting Nano state: {str(e)}"
72 |         
73 |     def send_keys(self, command):
74 |         """Send commands to Nano with proper timing"""
75 |         # Special handling for control sequences
76 |         self.pane.send_keys(command, enter=False)
77 |         time.sleep(0.2)  # Nano needs time to process
78 | 
79 |     def send_command(self, command):
80 |         """Alias of send_keys"""
81 |         self.send_keys(command)
82 | 
83 |     def send_commands(self, commands):
84 |         for command in commands:
85 |             self.send_keys(command)
86 | 
87 |     def get_screen_text(self):
88 |         """Alias for render()"""
89 |         return render(self.agent)
90 | 
91 |     def close(self):
92 |         """Close the Nano session"""
93 |         self.session.kill_session()
94 |         del self.agent.tools["nano"]
95 |         del self.agent.tools[f"nano-{self.filepath}"]
96 |         self.agent.remove_observation_view(self.observation_view)
97 | 


--------------------------------------------------------------------------------
/agent/trainer/linear_4bit_sharded.py:
--------------------------------------------------------------------------------
 1 | """4-bit quantization and FSDP-style parameter sharding for nn.Linear."""
 2 | 
 3 | import bitsandbytes as bnb
 4 | import torch
 5 | from torch import distributed as dist, nn
 6 | from torch.nn import functional as F
 7 | 
 8 | 
 9 | class Linear4bitSharded(nn.Linear):
10 |     def __init__(
11 |         self,
12 |         layer,
13 |         device,
14 |         group=None,
15 |     ):
16 |         if not isinstance(layer, nn.Linear):
17 |             raise ValueError("layer must be an instance of nn.Linear")
18 |         with torch.device("meta"):
19 |             super().__init__(layer.in_features, layer.out_features, bias=layer.bias is not None)
20 |         self.in_features = layer.in_features
21 |         self.out_features = layer.out_features
22 |         self.bias = nn.Parameter(layer.bias.to(device)) if layer.bias is not None else None
23 |         del self.weight
24 |         self.device = device
25 |         self.group = group
26 |         rank = dist.get_rank(group)
27 |         world_size = dist.get_world_size(group)
28 |         weight_q, state = bnb.functional.quantize_4bit(layer.weight.to(device))
29 |         self.state = state
30 |         self.weight_shape = weight_q.shape
31 |         weight_q = weight_q.flatten()
32 |         assert weight_q.shape[0] % world_size == 0
33 |         n_per_shard = weight_q.shape[0] // world_size
34 |         weight_q = weight_q[rank * n_per_shard : (rank + 1) * n_per_shard].clone()
35 |         self.register_buffer("weight", weight_q)
36 | 
37 |     def forward(self, x):
38 |         world_size = dist.get_world_size(self.group)
39 |         weight_list = [torch.empty_like(self.weight) for _ in range(world_size)]
40 |         dist.all_gather(weight_list, self.weight, group=self.group)
41 |         weight_q = torch.cat(weight_list).view(self.weight_shape)
42 |         weight = bnb.functional.dequantize_4bit(weight_q, self.state)
43 |         return F.linear(x, weight, self.bias)
44 | 
45 | 
46 | def quantize_and_shard(module, device, group=None):
47 |     if isinstance(module, nn.Linear):
48 |         return Linear4bitSharded(module, device, group)
49 |     for name, child in module.named_children():
50 |         setattr(module, name, quantize_and_shard(child, device, group))
51 |     return module.cuda(device)
52 | 


--------------------------------------------------------------------------------
/agent/trainer/linear_4bit_sharded_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import bitsandbytes as bnb
 4 | import torch
 5 | from torch import distributed as dist, nn
 6 | from torch.distributed import nn as dnn
 7 | import torch_dist_utils as du
 8 | 
 9 | from linear_4bit_sharded import Linear4bitSharded
10 | 
11 | 
12 | @torch.no_grad()
13 | def quantize_layer(
14 |     module, compute_dtype=None, blocksize=64, compress_statistics=True, quant_type="fp4"
15 | ):
16 |     if not isinstance(module, torch.nn.Linear):
17 |         raise ValueError("quantize_layer only supports nn.Linear")
18 |     compute_dtype = module.weight.dtype if compute_dtype is None else compute_dtype
19 |     q_module = bnb.nn.Linear4bit(
20 |         module.in_features,
21 |         module.out_features,
22 |         bias=module.bias is not None,
23 |         compute_dtype=compute_dtype,
24 |         compress_statistics=compress_statistics,
25 |         quant_type=quant_type,
26 |     )
27 |     q_module.weight = bnb.nn.Params4bit(
28 |         module.weight,
29 |         requires_grad=False,
30 |         blocksize=blocksize,
31 |         compress_statistics=compress_statistics,
32 |         quant_type=quant_type,
33 |     )
34 |     if module.bias is not None:
35 |         q_module.bias = torch.nn.Parameter(module.bias, requires_grad=module.bias.requires_grad)
36 |     if module.weight.device.type == "cuda":
37 |         q_module.cuda(module.weight.device)
38 |     return q_module
39 | 
40 | 
41 | def main():
42 |     du.init_distributed()
43 |     device = du.get_device()
44 |     rank = dist.get_rank()
45 | 
46 |     layer = nn.Linear(10, 20)
47 |     du.broadcast_tensors(layer.parameters())
48 |     layer_q = quantize_layer(layer).to(device)
49 |     layer_qs = Linear4bitSharded(layer, device)
50 | 
51 |     x = torch.randn(4, 10, device=device)
52 |     y_ref = layer_q(x)
53 |     y = layer_qs(x)
54 |     error = torch.sqrt(torch.mean((y - y_ref) ** 2))
55 |     with du.do_in_order():
56 |         print(f"Rank {rank}: error = {error}")
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     main()
61 | 


--------------------------------------------------------------------------------
/agent/trainer/mistral3_ring_attn_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import torch
 4 | from torch import distributed as dist
 5 | from torch.distributed import nn as dnn
 6 | import torch_dist_utils as du
 7 | from transformers import AutoModelForImageTextToText, AutoTokenizer, BitsAndBytesConfig
 8 | 
 9 | from patch_model import patch_model
10 | 
11 | 
12 | def kl_divergence(logits_p, logits_q):
13 |     logp = torch.nn.functional.log_softmax(logits_p, dim=-1)
14 |     logq = torch.nn.functional.log_softmax(logits_q, dim=-1)
15 |     return torch.sum(torch.exp(logp) * (logp - logq), dim=-1)
16 | 
17 | 
18 | def main():
19 |     du.init_distributed()
20 |     device = du.get_device()
21 |     rank = dist.get_rank()
22 |     world_size = dist.get_world_size()
23 | 
24 |     model_name = "mistralai/Mistral-Small-3.1-24B-Base-2503"
25 |     tokenizer = AutoTokenizer.from_pretrained(model_name)
26 |     prompt = "The quick brown fox jumps over the lazy dog, " * 8
27 |     tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64]
28 |     n_tokens = tokens.shape[1]
29 |     assert n_tokens % world_size == 0
30 |     position_ids = torch.arange(n_tokens, device=device)[None]
31 |     n_tokens_device = n_tokens // world_size
32 |     du.print0("Number of tokens:", n_tokens)
33 |     du.print0("Number of tokens per device:", n_tokens_device)
34 | 
35 |     bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
36 | 
37 |     model = (
38 |         AutoModelForImageTextToText.from_pretrained(
39 |             model_name,
40 |             device_map={"": device},
41 |             torch_dtype=torch.bfloat16,
42 |             quantization_config=bnb_config,
43 |         )
44 |         .eval()
45 |         .requires_grad_(False)
46 |     )
47 |     logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits
48 | 
49 |     del model
50 |     torch.cuda.empty_cache()
51 |     patch_model()
52 | 
53 |     model = (
54 |         AutoModelForImageTextToText.from_pretrained(
55 |             model_name,
56 |             device_map={"": device},
57 |             torch_dtype=torch.bfloat16,
58 |             quantization_config=bnb_config,
59 |         )
60 |         .eval()
61 |         .requires_grad_(False)
62 |     )
63 |     tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
64 |     position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
65 |     logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits
66 |     logits_all = torch.cat(dnn.all_gather(logits), dim=1)
67 | 
68 |     error = kl_divergence(logits_ref, logits_all).mean()
69 |     with du.do_in_order():
70 |         print(f"Rank {rank}: error = {error}")
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/agent/trainer/mixtral_ring_attn_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import torch
 4 | from torch import distributed as dist
 5 | from torch.distributed import nn as dnn
 6 | import torch_dist_utils as du
 7 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 8 | 
 9 | from patch_model import patch_model
10 | 
11 | 
12 | def kl_divergence(logits_p, logits_q):
13 |     logp = torch.nn.functional.log_softmax(logits_p, dim=-1)
14 |     logq = torch.nn.functional.log_softmax(logits_q, dim=-1)
15 |     return torch.sum(torch.exp(logp) * (logp - logq), dim=-1)
16 | 
17 | 
18 | def main():
19 |     du.init_distributed()
20 |     device = du.get_device()
21 |     rank = dist.get_rank()
22 |     world_size = dist.get_world_size()
23 | 
24 |     model_name = "mistralai/Mixtral-8x7B-v0.1"
25 |     tokenizer = AutoTokenizer.from_pretrained(model_name)
26 |     prompt = "The quick brown fox jumps over the lazy dog, " * 8
27 |     tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64]
28 |     n_tokens = tokens.shape[1]
29 |     assert n_tokens % world_size == 0
30 |     position_ids = torch.arange(n_tokens, device=device)[None]
31 |     n_tokens_device = n_tokens // world_size
32 |     du.print0("Number of tokens:", n_tokens)
33 |     du.print0("Number of tokens per device:", n_tokens_device)
34 | 
35 |     bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
36 | 
37 |     model = (
38 |         AutoModelForCausalLM.from_pretrained(
39 |             model_name,
40 |             device_map={"": device},
41 |             torch_dtype=torch.bfloat16,
42 |             quantization_config=bnb_config,
43 |         )
44 |         .eval()
45 |         .requires_grad_(False)
46 |     )
47 |     logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits
48 | 
49 |     del model
50 |     torch.cuda.empty_cache()
51 |     patch_model()
52 | 
53 |     model = (
54 |         AutoModelForCausalLM.from_pretrained(
55 |             model_name,
56 |             device_map={"": device},
57 |             torch_dtype=torch.bfloat16,
58 |             quantization_config=bnb_config,
59 |         )
60 |         .eval()
61 |         .requires_grad_(False)
62 |     )
63 |     tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
64 |     position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
65 |     logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits
66 |     logits_all = torch.cat(dnn.all_gather(logits), dim=1)
67 | 
68 |     error = kl_divergence(logits_ref, logits_all).mean()
69 |     with du.do_in_order():
70 |         print(f"Rank {rank}: error = {error}")
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/agent/trainer/patch_model.py:
--------------------------------------------------------------------------------
 1 | from transformers import modeling_utils
 2 | 
 3 | from ring_attn import ring_attn
 4 | 
 5 | 
 6 | class RingAttentionForward:
 7 |     def __init__(self, group):
 8 |         self.group = group
 9 | 
10 |     def __call__(self, module, query, key, value, *args, **kwargs):
11 |         query = query.transpose(1, 2)
12 |         key = key.transpose(1, 2)
13 |         value = value.transpose(1, 2)
14 |         out = ring_attn(query, key, value, causal=module.is_causal, group=self.group)
15 |         return out, None
16 | 
17 | 
18 | def patch_model(group=None):
19 |     modeling_utils.ALL_ATTENTION_FUNCTIONS["sdpa"] = RingAttentionForward(group)
20 | 


--------------------------------------------------------------------------------
/agent/trainer/qwen2_ring_attn_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import torch
 4 | from torch import distributed as dist
 5 | from torch.distributed import nn as dnn
 6 | import torch_dist_utils as du
 7 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 8 | 
 9 | from patch_model import patch_model
10 | 
11 | 
12 | def kl_divergence(logits_p, logits_q):
13 |     logp = torch.nn.functional.log_softmax(logits_p, dim=-1)
14 |     logq = torch.nn.functional.log_softmax(logits_q, dim=-1)
15 |     return torch.sum(torch.exp(logp) * (logp - logq), dim=-1)
16 | 
17 | 
18 | def main():
19 |     du.init_distributed()
20 |     device = du.get_device()
21 |     rank = dist.get_rank()
22 |     world_size = dist.get_world_size()
23 | 
24 |     model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"
25 |     tokenizer = AutoTokenizer.from_pretrained(model_name)
26 |     prompt = "The quick brown fox jumps over the lazy dog, " * 8
27 |     tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64]
28 |     n_tokens = tokens.shape[1]
29 |     assert n_tokens % world_size == 0
30 |     position_ids = torch.arange(n_tokens, device=device)[None]
31 |     n_tokens_device = n_tokens // world_size
32 |     du.print0("Number of tokens:", n_tokens)
33 |     du.print0("Number of tokens per device:", n_tokens_device)
34 | 
35 |     bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
36 | 
37 |     model = (
38 |         AutoModelForCausalLM.from_pretrained(
39 |             model_name,
40 |             device_map={"": device},
41 |             torch_dtype=torch.bfloat16,
42 |             quantization_config=bnb_config,
43 |         )
44 |         .eval()
45 |         .requires_grad_(False)
46 |     )
47 |     logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits
48 | 
49 |     del model
50 |     torch.cuda.empty_cache()
51 |     patch_model()
52 | 
53 |     model = (
54 |         AutoModelForCausalLM.from_pretrained(
55 |             model_name,
56 |             device_map={"": device},
57 |             torch_dtype=torch.bfloat16,
58 |             quantization_config=bnb_config,
59 |         )
60 |         .eval()
61 |         .requires_grad_(False)
62 |     )
63 |     tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
64 |     position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
65 |     logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits
66 |     logits_all = torch.cat(dnn.all_gather(logits), dim=1)
67 | 
68 |     error = kl_divergence(logits_ref, logits_all).mean()
69 |     with du.do_in_order():
70 |         print(f"Rank {rank}: error = {error}")
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/agent/trainer/ring_attn.py:
--------------------------------------------------------------------------------
  1 | """Ring attention for PyTorch.
  2 | 
  3 | See https://github.com/nshepperd/flash_attn_jax/blob/main/src/flash_attn_jax/ring_attention.py.
  4 | """
  5 | 
  6 | import flash_attn.flash_attn_interface as fai
  7 | import torch
  8 | from torch import distributed as dist
  9 | 
 10 | 
 11 | def ppermute(xs, perm, group=None):
 12 |     rank = dist.get_rank(group)
 13 |     ys = [torch.empty_like(x) for x in xs]
 14 |     ops = []
 15 |     for src, dst in perm:
 16 |         for x, y in zip(xs, ys):
 17 |             if src == rank:
 18 |                 ops.append(dist.P2POp(dist.isend, x, dst, group))
 19 |             if dst == rank:
 20 |                 ops.append(dist.P2POp(dist.irecv, y, src, group))
 21 |     reqs = dist.batch_isend_irecv(ops)
 22 |     for req in reqs:
 23 |         req.wait()
 24 |     return ys
 25 | 
 26 | 
 27 | def _flash_fwd(q, k, v, causal):
 28 |     ret = fai._flash_attn_forward(
 29 |         q=q,
 30 |         k=k,
 31 |         v=v,
 32 |         dropout_p=0.0,
 33 |         softmax_scale=k.shape[-1] ** -0.5,
 34 |         causal=causal,
 35 |         window_size=(-1, 0) if causal else (-1, -1),
 36 |         softcap=0.0,
 37 |         alibi_slopes=None,
 38 |         return_softmax=False,
 39 |     )
 40 |     return ret[0], ret[5]  # out, lse
 41 | 
 42 | 
 43 | def _flash_bwd(do, q, k, v, o, lse, causal):
 44 |     ret = fai._flash_attn_backward(
 45 |         dout=do,
 46 |         q=q,
 47 |         k=k,
 48 |         v=v,
 49 |         out=o,
 50 |         softmax_lse=lse,
 51 |         dq=torch.empty_like(q),
 52 |         dk=torch.empty_like(k),
 53 |         dv=torch.empty_like(v),
 54 |         dropout_p=0,
 55 |         softmax_scale=k.shape[-1] ** -0.5,
 56 |         causal=causal,
 57 |         window_size=(-1, 0) if causal else (-1, -1),
 58 |         softcap=0.0,
 59 |         alibi_slopes=None,
 60 |         deterministic=False,
 61 |         rng_state=None,
 62 |     )
 63 |     return ret[0], ret[1], ret[2]  # dq, dk, dv
 64 | 
 65 | 
 66 | def _ring_fwd(q, k, v, causal=False, group=None):
 67 |     rank = dist.get_rank(group)
 68 |     world_size = dist.get_world_size(group)
 69 |     perm = [(i, (i + 1) % world_size) for i in range(world_size)]
 70 |     n, s, h, d = q.shape
 71 |     q_ix = torch.tensor(rank, device=q.device)
 72 |     k_ix = torch.tensor(rank, device=q.device)
 73 |     o = torch.zeros_like(q, dtype=torch.float32)
 74 |     lse = torch.full((n, h, s), float("-inf"), device=q.device, dtype=torch.float32)
 75 |     for _ in range(world_size):
 76 |         o1, lse1 = o, lse
 77 |         if not causal:
 78 |             o2, lse2 = _flash_fwd(q, k, v, causal=False)
 79 |         else:
 80 |             if q_ix < k_ix:
 81 |                 o2 = torch.zeros_like(q)
 82 |                 lse2 = torch.full((n, h, s), float("-inf"), device=q.device, dtype=torch.float32)
 83 |             elif q_ix == k_ix:
 84 |                 o2, lse2 = _flash_fwd(q, k, v, causal=True)
 85 |             else:
 86 |                 o2, lse2 = _flash_fwd(q, k, v, causal=False)
 87 |         lse = torch.logaddexp(lse1, lse2)
 88 |         o = o1 * torch.exp(lse1 - lse).mT[..., None] + o2 * torch.exp(lse2 - lse).mT[..., None]
 89 |         k, v, k_ix = ppermute([k, v, k_ix], perm, group)
 90 |     return o.to(q.dtype), lse
 91 | 
 92 | 
 93 | def _ring_bwd(do, q, k, v, o, lse, causal=False, group=None):
 94 |     rank = dist.get_rank(group)
 95 |     world_size = dist.get_world_size(group)
 96 |     perm = [(i, (i + 1) % world_size) for i in range(world_size)]
 97 |     ix = torch.tensor(rank, device=q.device)
 98 |     dq = torch.zeros_like(q, dtype=torch.float32)
 99 |     dk = torch.zeros_like(k, dtype=torch.float32)
100 |     dv = torch.zeros_like(v, dtype=torch.float32)
101 |     k2, v2, dk2, dv2, ix2 = k, v, dk, dv, ix
102 |     for _ in range(world_size):
103 |         dk2_, dv2_, k2_, v2_, ix2_ = ppermute([dk2, dv2, k2, v2, ix2], perm, group)
104 |         if not causal:
105 |             dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=False)
106 |             dq += dqa
107 |             dk2_ += dka
108 |             dv2_ += dva
109 |         else:
110 |             if ix == ix2:
111 |                 dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=True)
112 |             elif ix > ix2:
113 |                 dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=False)
114 |             if ix >= ix2:
115 |                 dq += dqa
116 |                 dk2_ += dka
117 |                 dv2_ += dva
118 |         k2, v2, dk2, dv2, ix2 = k2_, v2_, dk2_, dv2_, ix2_
119 |     dk2, dv2 = ppermute([dk2, dv2], perm)
120 |     return dq.to(q.dtype), dk2.to(k.dtype), dv2.to(v.dtype)
121 | 
122 | 
123 | class _RingAttention(torch.autograd.Function):
124 |     @staticmethod
125 |     def setup_context(ctx, inputs, output):
126 |         q, k, v, causal, group = inputs
127 |         o, lse = output
128 |         ctx.causal = causal
129 |         ctx.group = group
130 |         ctx.save_for_backward(q, k, v, o, lse)
131 | 
132 |     @staticmethod
133 |     def forward(q, k, v, causal, group):
134 |         return _ring_fwd(q, k, v, causal=causal, group=group)
135 | 
136 |     @staticmethod
137 |     def backward(ctx, do, _):
138 |         q, k, v, o, lse = ctx.saved_tensors
139 |         dq, dk, dv = _ring_bwd(do, q, k, v, o, lse, causal=ctx.causal, group=ctx.group)
140 |         return dq, dk, dv, None, None
141 | 
142 | 
143 | def ring_attn(q, k, v, causal=False, group=None):
144 |     o, lse = _RingAttention.apply(q, k, v, causal, group)
145 |     return o
146 | 


--------------------------------------------------------------------------------
/agent/trainer/ring_attn_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import flash_attn
 4 | import torch
 5 | from torch import distributed as dist
 6 | from torch.distributed import nn as dnn
 7 | import torch_dist_utils as du
 8 | 
 9 | from ring_attn import ppermute, ring_attn
10 | 
11 | 
12 | def main():
13 |     du.init_distributed()
14 |     device = du.get_device()
15 |     rank = dist.get_rank()
16 |     world_size = dist.get_world_size()
17 | 
18 |     # test ppermute
19 |     du.print0("Testing ppermute...")
20 |     x = torch.arange(rank * 4, (rank + 1) * 4, device=device)
21 |     perm = [(i, (i + 1) % world_size) for i in range(world_size)]
22 |     y = ppermute([x], perm)
23 |     with du.do_in_order():
24 |         print(f"Rank {rank}: x = {x}, y = {y}")
25 | 
26 |     q = torch.randn(4, 10, 8, 64, device=device, dtype=torch.bfloat16)
27 |     k = torch.randn(4, 10, 4, 64, device=device, dtype=torch.bfloat16)
28 |     v = torch.randn(4, 10, 4, 64, device=device, dtype=torch.bfloat16)
29 |     do = torch.randn(4, 10, 8, 64, device=device, dtype=torch.bfloat16)
30 |     q_all = torch.cat(dnn.all_gather(q), dim=1)
31 |     k_all = torch.cat(dnn.all_gather(k), dim=1)
32 |     v_all = torch.cat(dnn.all_gather(v), dim=1)
33 |     do_all = torch.cat(dnn.all_gather(do), dim=1)
34 | 
35 |     # non-causal
36 |     du.print0("Testing non-causal ring attention...")
37 |     q_all_ = q_all.clone().requires_grad_()
38 |     k_all_ = k_all.clone().requires_grad_()
39 |     v_all_ = v_all.clone().requires_grad_()
40 |     o_ref = flash_attn.flash_attn_func(q_all_, k_all_, v_all_, causal=False)
41 |     o_ref.backward(do_all)
42 |     q_ = q.clone().requires_grad_()
43 |     k_ = k.clone().requires_grad_()
44 |     v_ = v.clone().requires_grad_()
45 |     o = ring_attn(q_, k_, v_, causal=False)
46 |     o.backward(do)
47 |     o_all = torch.cat(dnn.all_gather(o), dim=1)
48 |     dq_all = torch.cat(dnn.all_gather(q_.grad), dim=1)
49 |     dk_all = torch.cat(dnn.all_gather(k_.grad), dim=1)
50 |     dv_all = torch.cat(dnn.all_gather(v_.grad), dim=1)
51 |     error_o = torch.sqrt(torch.mean((o_all - o_ref) ** 2))
52 |     error_dq = torch.sqrt(torch.mean((q_all_.grad - dq_all) ** 2))
53 |     error_dk = torch.sqrt(torch.mean((k_all_.grad - dk_all) ** 2))
54 |     error_dv = torch.sqrt(torch.mean((v_all_.grad - dv_all) ** 2))
55 |     with du.do_in_order():
56 |         print(f"Rank {rank}: error  o = {error_o}")
57 |         print(f"Rank {rank}: error dq = {error_dq}")
58 |         print(f"Rank {rank}: error dk = {error_dk}")
59 |         print(f"Rank {rank}: error dv = {error_dv}")
60 | 
61 |     # causal
62 |     du.print0("Testing causal ring attention...")
63 |     q_all_ = q_all.clone().requires_grad_()
64 |     k_all_ = k_all.clone().requires_grad_()
65 |     v_all_ = v_all.clone().requires_grad_()
66 |     o_ref = flash_attn.flash_attn_func(q_all_, k_all_, v_all_, causal=True)
67 |     o_ref.backward(do_all)
68 |     q_ = q.clone().requires_grad_()
69 |     k_ = k.clone().requires_grad_()
70 |     v_ = v.clone().requires_grad_()
71 |     o = ring_attn(q_, k_, v_, causal=True)
72 |     o.backward(do)
73 |     o_all = torch.cat(dnn.all_gather(o), dim=1)
74 |     dq_all = torch.cat(dnn.all_gather(q_.grad), dim=1)
75 |     dk_all = torch.cat(dnn.all_gather(k_.grad), dim=1)
76 |     dv_all = torch.cat(dnn.all_gather(v_.grad), dim=1)
77 |     error_o = torch.sqrt(torch.mean((o_all - o_ref) ** 2))
78 |     error_dq = torch.sqrt(torch.mean((q_all_.grad - dq_all) ** 2))
79 |     error_dk = torch.sqrt(torch.mean((k_all_.grad - dk_all) ** 2))
80 |     error_dv = torch.sqrt(torch.mean((v_all_.grad - dv_all) ** 2))
81 |     with du.do_in_order():
82 |         print(f"Rank {rank}: error  o = {error_o}")
83 |         print(f"Rank {rank}: error dq = {error_dq}")
84 |         print(f"Rank {rank}: error dk = {error_dk}")
85 |         print(f"Rank {rank}: error dv = {error_dv}")
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/agent/trainer/trainer_val_loss.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import json
  5 | import os
  6 | from pathlib import Path
  7 | 
  8 | import peft
  9 | import torch
 10 | from torch import distributed as dist
 11 | from torch.nn import functional as F
 12 | from torch.utils import data
 13 | import torch_dist_utils as du
 14 | from tqdm import tqdm
 15 | from transformers import AutoModelForCausalLM, AutoModelForImageTextToText, AutoTokenizer
 16 | 
 17 | from linear_4bit_sharded import quantize_and_shard
 18 | from patch_model import patch_model
 19 | 
 20 | print = tqdm.external_write_mode()(print)
 21 | print0 = tqdm.external_write_mode()(du.print0)
 22 | 
 23 | 
 24 | class Dataset(data.Dataset):
 25 |     def __init__(self, path, tokenizer):
 26 |         self.tokenizer = tokenizer
 27 |         self.dataset = Path(path).read_text().splitlines()
 28 | 
 29 |     def __len__(self):
 30 |         return len(self.dataset)
 31 | 
 32 |     def __getitem__(self, idx):
 33 |         return self.tokenizer(json.loads(self.dataset[idx])["text"]).input_ids
 34 | 
 35 | 
 36 | class CollateFn:
 37 |     def __init__(self, seq_len):
 38 |         self.seq_len = seq_len
 39 | 
 40 |     def __call__(self, batch):
 41 |         input_ids = torch.full((len(batch), self.seq_len), 0, dtype=torch.long)
 42 |         target_ids = torch.full((len(batch), self.seq_len), -100, dtype=torch.long)
 43 |         for i, x in enumerate(batch):
 44 |             ids = torch.tensor(x, dtype=torch.long)
 45 |             max_len = min(len(x) - 1, self.seq_len)
 46 |             input_ids[i, :max_len] = ids[:max_len]
 47 |             target_ids[i, :max_len] = ids[1 : max_len + 1]
 48 |         return input_ids, target_ids
 49 | 
 50 | 
 51 | def main():
 52 |     parser = argparse.ArgumentParser(description=__doc__)
 53 |     parser.add_argument("--model", type=str, required=True, help="Model name or path")
 54 |     parser.add_argument("--adapter", type=str, help="adapter name or path")
 55 |     parser.add_argument("--dataset", type=Path, required=True, help="Dataset path")
 56 |     parser.add_argument("--batch-size", type=int, default=1, help="Batch size per group")
 57 |     parser.add_argument("--seq-len", type=int, required=True, help="Sequence length")
 58 |     parser.add_argument("--mistral3", action="store_true", help="Use Mistral 3 model")
 59 |     args = parser.parse_args()
 60 | 
 61 |     du.init_distributed()
 62 |     device = du.get_device()
 63 |     rank = dist.get_rank()
 64 |     world_size = dist.get_world_size()
 65 |     local_group = du.get_local_group()
 66 |     local_rank = dist.get_rank(local_group)
 67 |     local_world_size = dist.get_world_size(local_group)
 68 |     group_rank = int(os.environ["GROUP_RANK"])
 69 |     group_world_size = world_size // local_world_size
 70 |     seq_len_device = args.seq_len // local_world_size
 71 | 
 72 |     # this does the wrong thing when the number of examples is not divisible by the group world size
 73 |     # i'll fix it later
 74 |     tokenizer = AutoTokenizer.from_pretrained(args.model)
 75 |     dataset = Dataset(args.dataset, tokenizer)
 76 |     sampler = data.DistributedSampler(
 77 |         dataset, group_world_size, group_rank, shuffle=False, drop_last=True
 78 |     )
 79 |     dataloader = data.DataLoader(
 80 |         dataset,
 81 |         batch_size=args.batch_size,
 82 |         sampler=sampler,
 83 |         collate_fn=CollateFn(args.seq_len),
 84 |     )
 85 | 
 86 |     patch_model(local_group)
 87 |     model_cls = AutoModelForCausalLM if not args.mistral3 else AutoModelForImageTextToText
 88 |     model = model_cls.from_pretrained(args.model, torch_dtype=torch.bfloat16)
 89 |     model = quantize_and_shard(model, device, local_group)
 90 |     torch.cuda.empty_cache()
 91 |     if args.adapter:
 92 |         model = peft.PeftModel.from_pretrained(model, args.adapter)
 93 | 
 94 |     total_loss = torch.tensor(0.0, device=device)
 95 |     total_targets = torch.tensor(0, device=device)
 96 | 
 97 |     for i, (input_ids, target_ids) in enumerate(tqdm(dataloader, disable=rank != 0)):
 98 |         input_ids, target_ids = input_ids.to(device), target_ids.to(device)
 99 |         seq_start = local_rank * seq_len_device
100 |         seq_end = (local_rank + 1) * seq_len_device
101 |         input_ids_local = input_ids[:, seq_start:seq_end]
102 |         target_ids_local = target_ids[:, seq_start:seq_end]
103 |         position_ids_local = torch.arange(seq_start, seq_end, device=device)
104 |         position_ids_local = position_ids_local.expand_as(input_ids_local)
105 |         total_targets += torch.sum(target_ids_local != -100)
106 |         with torch.no_grad(), torch.amp.autocast("cuda", dtype=torch.bfloat16):
107 |             logits = model(input_ids_local, position_ids=position_ids_local, use_cache=False).logits
108 |             loss = F.cross_entropy(logits.mT, target_ids_local, reduction="sum")
109 |         total_loss += loss
110 | 
111 |     dist.all_reduce(total_loss)
112 |     dist.all_reduce(total_targets)
113 |     val_loss = total_loss / total_targets
114 |     print0(f"Validation loss: {val_loss:g}")
115 | 
116 | 
117 | if __name__ == "__main__":
118 |     main()
119 | 


--------------------------------------------------------------------------------
/agent/tuning_preprocess.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import json
 3 | import random
 4 | from argparse import ArgumentParser
 5 | from datasets import load_dataset, concatenate_datasets
 6 | from transformers import AutoTokenizer
 7 | from functools import partial
 8 | 
 9 | parser = ArgumentParser()
10 | parser.add_argument("training_format")
11 | parser.add_argument("--model", default="Qwen/Qwen2.5-Coder-32B-Instruct")
12 | parser.add_argument("--dataset", default="jdpressman/retroinstruct-agent-mix-v0.2")
13 | parser.add_argument("--context-len", type=int, default=128000)
14 | args = parser.parse_args()
15 | 
16 | # Initialize the tokenizer
17 | tokenizer = AutoTokenizer.from_pretrained(args.model)
18 | 
19 | # Dataset helper functions
20 | def combine_flan(row):
21 |     return f"<s> [INST] {row['inputs']} [/INST]{row['targets']}</s>"
22 | 
23 | def combine_dolly(row):
24 |     return f"<s> [INST] {row['context']}\n\n{row['instruction']} [/INST]{row['response']}</s>"
25 | 
26 | def to_tokens(combine_fn, row):
27 |     return tokenizer(combine_fn(row), add_special_tokens=False)
28 | 
29 | # Load dataset
30 | print("### Loading datasets", file=sys.stderr)
31 | dataset_1 = load_dataset("Open-Orca/FLAN")
32 | dataset_2 = load_dataset("databricks/databricks-dolly-15k")
33 | dataset_3 = load_dataset(args.dataset)
34 | 
35 | # Slice the first 150,000 items from dataset_1
36 | dataset_1_sliced = dataset_1["train"].select(range(150000))
37 | 
38 | # Apply map to the sliced dataset_1 and the other datasets
39 | dataset_1_sliced = dataset_1_sliced.map(partial(to_tokens, combine_flan))
40 | dataset_2 = dataset_2["train"].map(partial(to_tokens, combine_dolly))
41 | dataset_3 = dataset_3["train"].map(partial(to_tokens, combine_flan))
42 | 
43 | # Combine datasets
44 | combined_dataset = concatenate_datasets([dataset_1_sliced, dataset_2, dataset_3])
45 | 
46 | # Shuffle the combined dataset
47 | combined_dataset = combined_dataset.shuffle()
48 | 
49 | # Concatenate all rows into a single list of tokens
50 | concatenated_tokens = []
51 | for row in combined_dataset:
52 |     concatenated_tokens.extend(row["input_ids"])
53 | 
54 | # Split into chunks of context_len tokens
55 | chunk_size = args.context_len
56 | chunks = []
57 | for i in range(0, len(concatenated_tokens), chunk_size):
58 |     chunks.append(concatenated_tokens[i:i + chunk_size])
59 | 
60 | # Shuffle the chunks
61 | random.shuffle(chunks)
62 | 
63 | # Convert chunks back to text
64 | chunked_texts = [tokenizer.decode(chunk) for chunk in chunks]
65 | # assert len(tokenizer(chunked_texts[0], add_special_tokens=False)["input_ids"]) == 64000
66 | 
67 | train_val_test_ratios = [0.85, 0.1, 0.05]
68 | train_len = int(train_val_test_ratios[0] * len(chunked_texts))
69 | val_len = int(train_val_test_ratios[1] * len(chunked_texts))
70 | test_len = int(train_val_test_ratios[2] * len(chunked_texts))
71 | 
72 | train = chunked_texts[:train_len]
73 | val = chunked_texts[train_len:train_len+val_len]
74 | test = chunked_texts[train_len+val_len:]
75 | 
76 | def write_dataset(filepath, data, _format):
77 |     # Save to JSON lines file
78 |     with open(filepath, 'w') as f:
79 |         for text in data:
80 |             if _format == "axolotl":
81 |                 f.write(json.dumps({"text": text}) + "\n")
82 |             elif _format == "nemo":
83 |                 f.write(json.dumps({"input": "", "output": text}) + "\n")
84 |         f.flush()
85 | 
86 | write_dataset("weave_train.jsonl", train, args.training_format)
87 | write_dataset("weave_val.jsonl", val, args.training_format)
88 | write_dataset("weave_test.jsonl", test, args.training_format)
89 |         
90 | print("### Dataset preprocessing complete", file=sys.stderr)
91 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import json
 3 | import zipfile
 4 | 
 5 | class ZippedConversationsDataset:
 6 |     def __init__(self, zip_file):
 7 |         self.training_items = []
 8 |         zip_ = zipfile.ZipFile(zip_file)
 9 |         for file_ in zip_.namelist():
10 |             if file_.endswith("/"): # Skip directories
11 |                 continue
12 |             if file_.startswith("__MACOSX"): # Mac OS X adds garbage to zips
13 |                 continue
14 |             with zip_.open(file_) as infile:
15 |                 conversation = json.load(infile)
16 |                 for id_ in conversation["responseDict"]:
17 |                     branch = conversation["responseDict"][id_]
18 |                     if branch["rating"] == None: # Skip unrated entries
19 |                         continue
20 |                     label = "Yes" if branch["rating"] else "No"
21 |                     text = branch["evaluationPrompt"].format(
22 |                         prompt = branch["prompt"],
23 |                         response = branch["text"]) + "\n" + label
24 |                     self.training_items.append(text)
25 |         random.shuffle(self.training_items)
26 | 
27 |     def __len__(self):
28 |         return len(self.training_items)
29 |         
30 |     def __next__(self):
31 |         return random.sample(self.training_items, 1)[0]
32 | 


--------------------------------------------------------------------------------
/dpo_loss.py:
--------------------------------------------------------------------------------
 1 | """Direct Preference Optimization loss. (https://arxiv.org/abs/2305.18290)"""
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | 
 7 | 
 8 | def logp_completion(logits, tokens, mask):
 9 |     """Compute the log probabilities of completions given their prompts.
10 | 
11 |     Args:
12 |         tokens: The tokens input to the model. Shape: (..., T).
13 |         logits: The logits output from the model. Shape: (..., T, V).
14 |         mask: A mask indicating which tokens should be included in the log probabilities. It should
15 |             exclude prompt tokens and padding tokens. Shape: (..., T).
16 |     """
17 |     logits = F.log_softmax(logits, dim=-1)
18 |     logp_tokens = logits[..., :-1, :].gather(-1, tokens[..., 1:, None])[..., 0]
19 |     return torch.sum(logp_tokens * mask[..., 1:], dim=-1)
20 | 
21 | 
22 | def reduction(x, reduction):
23 |     if reduction == "mean":
24 |         return torch.mean(x)
25 |     elif reduction == "sum":
26 |         return torch.sum(x)
27 |     elif reduction == "none":
28 |         return x
29 |     else:
30 |         raise ValueError(f"Unknown reduction: {reduction}")
31 | 
32 | 
33 | class DPOLoss(nn.Module):
34 |     """Direct Preference Optimization loss. (https://arxiv.org/abs/2305.18290)
35 | 
36 |     The DPO loss takes as input pairs of log probabilities of completions given the
37 |     same prompt for each completion in a pair, under the model and a reference model, and a win
38 |     rate indicating how often the first completion is preferred over the second. It optimizes the
39 |     model to maximize the implied reward, regularized by the KL divergence between the model and
40 |     the reference model.
41 | 
42 |     Conservative DPO (https://ericmitchell.ai/cdpo.pdf) is supported using the `eps` parameter
43 |     and/or the `win_rate` argument.
44 | 
45 |     Args:
46 |         beta (float): The KL penalty coefficient.
47 |         eps (float): The label smoothing amount.
48 |         reduction (str): The reduction to apply to the loss.
49 |     """
50 | 
51 |     def __init__(self, beta, eps=0.0, reduction="mean"):
52 |         super().__init__()
53 |         self.beta = beta
54 |         self.eps = eps
55 |         self.reduction = reduction
56 | 
57 |     def extra_repr(self):
58 |         return f"beta={self.beta:g}, eps={self.eps:g}, reduction={self.reduction!r}"
59 | 
60 |     def forward(self, logp_1, logp_ref_1, logp_2, logp_ref_2, win_rate=None):
61 |         """Compute the Direct Preference Optimization loss.
62 | 
63 |         Args:
64 |             logp_1: Log probabilities of the first completions given their prompts under the
65 |                 model. Should be differentiable w.r.t. the model parameters. Shape: (N).
66 |             logp_ref_1: Log probabilities of the first completions given their prompts under the
67 |                 reference model. Shape: (N).
68 |             logp_2: Log probabilities of the second completions given their prompts, under the
69 |                 model. Should be differentiable w.r.t. the model parameters. Shape: (N).
70 |             logp_ref_2: Log probabilities of the second completions given their prompts under the
71 |                 reference model. Shape: (N).
72 |             win_rate: 0-1, indicating how often the first completion is preferred over the second.
73 |                 Shape: (N). Default: 1 (the first completion is always preferred).
74 |         """
75 |         win_rate = torch.ones_like(logp_1) if win_rate is None else win_rate
76 |         win_rate = win_rate * (1 - 2 * self.eps) + self.eps
77 |         ratio_1 = logp_1 - logp_ref_1
78 |         ratio_2 = logp_2 - logp_ref_2
79 |         losses_1 = -F.logsigmoid(self.beta * (ratio_1 - ratio_2))
80 |         losses_2 = -F.logsigmoid(self.beta * (ratio_2 - ratio_1))
81 |         losses = torch.lerp(losses_2, losses_1, win_rate)
82 |         return reduction(losses, self.reduction)
83 | 


--------------------------------------------------------------------------------
/examples/bayes_evaluate_weave_test_cases.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | import os
 3 | import re
 4 | import json
 5 | import time
 6 | import datetime
 7 | import random
 8 | import hashlib
 9 | import zipfile
10 | from contextlib import contextmanager
11 | from functools import partial
12 | from itertools import islice
13 | from tqdm import tqdm
14 | import torch
15 | from bayes_weave import generate_outputs_vllm, bayesian_evaluate_outputs_vllm
16 | from bayes_weave import make_bayes_score_prompt_vllm
17 | 
18 | def batched(iterable, n):
19 |     "Batch data into tuples of length n. The last batch may be shorter."
20 |     # batched('ABCDEFG', 3) --> ABC DEF G
21 |     if n < 1:
22 |         raise ValueError("n must be at least one")
23 |     it = iter(iterable)
24 |     while batch := tuple(islice(it, n)):
25 |         yield batch
26 | 
27 | def parse_constitution(cons):
28 |     principles = {}
29 |     raw_principles = re.split("==\[(.+)\]==", cons)[1:]
30 |     principle_pairs = [i for i in batched(raw_principles, 2)]
31 |     principle_pairs = [(i[0].strip(), i[1].strip()) for i in principle_pairs]
32 |     principles["preamble"] = principle_pairs[0][1]
33 |     principles["principles"] = []
34 |     for pair in principle_pairs[1:]:
35 |         principle = {}
36 |         for parameter in pair[0].split(";"):
37 |             try:
38 |                 name, value = parameter.split(":")
39 |             except ValueError:
40 |                 raise ValueError(f"{pair} is missing a colon in a header value")
41 |             principle[name.strip().lower()] = value.strip().lower()
42 |         principle["body"] = pair[1].strip()
43 |         principles["principles"].append(principle)
44 |     return principles
45 | 
46 | def prepare_rubric(rubric_path, rubric_score_fn, prompt):
47 |     with open(rubric_path) as infile:
48 |         rubric = parse_constitution(infile.read())
49 |         principle_weights = [float(principle["weight"]) for principle in rubric["principles"]]
50 |         principle_weights = torch.tensor(principle_weights)
51 |         principle_signs = []
52 |         for principle in rubric["principles"]:
53 |             answer = principle["answer"].lower()
54 |             if answer not in {"yes", "no"}:
55 |                 raise ValueError("desired answer must be yes or no")
56 |             principle_signs.append(1 if answer == "yes" else -1)
57 |         principle_signs = torch.tensor(principle_signs)
58 |     rubric_score_fns = []
59 |     parent_q = rubric["principles"][0]["body"].format(preamble="",
60 |                                                       parent_q="",
61 |                                                       prompt="",
62 |                                                       response="")
63 |     for principle in rubric["principles"]:
64 |         evaluation_prompt = principle["body"].format(preamble=rubric["preamble"],
65 |                                                      parent_q="{parent_q}",
66 |                                                      prompt=prompt,
67 |                                                      response="{response}")
68 |         score_prompt_fn = partial(rubric_score_fn, evaluation_prompt)
69 |         # FLAN evaluator LoRA suffix
70 |         rubric_score_fns.append(partial(score_prompt_fn, "<|end|>", prompt))
71 |     return parent_q, rubric_score_fns, principle_weights, principle_signs
72 | 
73 | if __name__ == "__main__":
74 |     parser = ArgumentParser()
75 |     parser.add_argument("rubric_path", help="Filepath to the grading rubric to use.")
76 |     parser.add_argument("cases", help="Filepath to the test cases to evaluate.")
77 |     parser.add_argument("--evaluator", default="upstage/SOLAR-10.7B-v1.0")
78 |     args = parser.parse_args()
79 | 
80 |     evaluate_fn = partial(bayesian_evaluate_outputs_vllm, args.evaluator)
81 | 
82 |     parent_q, rubric_score_fns, q_weights, q_signs = prepare_rubric(args.rubric_path,
83 |                                                                     make_bayes_score_prompt_vllm,
84 |                                                                     "")
85 |     # TODO: Make better way to specify parent question in rubric file
86 |     parent_q = "Is the passage well written?"
87 |     with open(args.cases) as infile:
88 |         cases = infile.read().split("<|endcase|>")
89 | 
90 |     # Evaluate the input text using the evaluate_outputs_vllm function
91 |     scores = evaluate_fn(parent_q, rubric_score_fns, cases, port=5001)
92 |  
93 |     for index, case in tqdm(enumerate(cases), desc="Evaluating cases"):
94 |         print(scores[index], case[:150] + "...", end="\n\n")
95 | 


--------------------------------------------------------------------------------
/examples/bayes_lorem_ipsum_rubric.txt:
--------------------------------------------------------------------------------
  1 | ==[PREAMBLE]==
  2 | Answer yes or no and only yes or no.
  3 | 
  4 | ==[Principle: Well written; Weight: 1.0; Answer: Yes]==
  5 | {preamble}
  6 | 
  7 | {prompt}
  8 | <passage>
  9 | {response}
 10 | </passage>{parent_q}
 11 | 
 12 | Is the passage well written?
 13 | 
 14 | ==[Principle: Coherent; Weight: 1.0; Answer: Yes]==
 15 | {preamble}
 16 | 
 17 | {prompt}
 18 | <passage>
 19 | {response}
 20 | </passage>{parent_q}
 21 | 
 22 | Is the passage coherent? Is it high quality writing that expresses a single
 23 | narrative of considered thought?
 24 | 
 25 | ==[Principle: Quotable; Weight: 1.0; Answer: Yes]==
 26 | {preamble}
 27 | 
 28 | {prompt}
 29 | <passage>
 30 | {response}
 31 | </passage>{parent_q}
 32 | 
 33 | Does the passage seem quotable? Would it appear on a quotes page for this author?
 34 | 
 35 | ==[Principle: True, Kind, Necessary; Weight: 1.0; Answer: Yes]==
 36 | {preamble}
 37 | 
 38 | {prompt}
 39 | <passage>
 40 | {response}
 41 | </passage>{parent_q}
 42 | 
 43 | Is the content in this passage two of true, kind, necessary? Is it fair to its subject?
 44 | 
 45 | ==[Principle: Engaging and Dynamic; Weight: 1.0; Answer: Yes]==
 46 | {preamble}
 47 | 
 48 | {prompt}
 49 | <passage>
 50 | {response}
 51 | </passage>{parent_q}
 52 | 
 53 | Is the content of the passage engaging and dynamic?
 54 | 
 55 | ==[Principle: Insightful; Weight: 1.0; Answer: Yes]==
 56 | {preamble}
 57 | 
 58 | {prompt}
 59 | <passage>
 60 | {response}
 61 | </passage>{parent_q}
 62 | 
 63 | Does this passage convey an insight or interesting idea?
 64 | 
 65 | ==[Principle: Scholarly & Academic; Weight: 1.0; Answer: Yes]==
 66 | {preamble}
 67 | 
 68 | {prompt}
 69 | <passage>
 70 | {response}
 71 | </passage>{parent_q}
 72 | 
 73 | Does this passage have a scholarly or academic tone?
 74 | 
 75 | ==[Principle: Substance; Weight: 1.0; Answer: Yes]==
 76 | {preamble}
 77 | 
 78 | {prompt}
 79 | <passage>
 80 | {response}
 81 | </passage>{parent_q}
 82 | 
 83 | Is the passage about something in particular, does it convey a concrete idea
 84 | as opposed to high perplexity flimflam?
 85 | 
 86 | ==[Principle: True and correct; Weight: 1.0; Answer: Yes]==
 87 | {preamble}
 88 | 
 89 | {prompt}
 90 | <passage>
 91 | {response}
 92 | </passage>{parent_q}
 93 | 
 94 | Does this passage contain true and correct information?
 95 | 
 96 | ==[Principle: Slop; Weight: 1.0; Answer: Yes]==
 97 | {preamble}
 98 | 
 99 | {prompt}
100 | <passage>
101 | {response}
102 | </passage>{parent_q}
103 | 
104 | Slop is AI generated content that sounds superficially like good writing but is
105 | actually vacuous on close inspection. Is the above passage slop?
106 | 
107 | ==[Principle: Correct grammar, punctuation, etc; Weight: 1.0; Answer: No]==
108 | {preamble}
109 | 
110 | {prompt}
111 | <passage>
112 | {response}
113 | </passage>{parent_q}
114 | 
115 | Is this passage written with messy punctuation and grammar?
116 | 
117 | ==[Principle: Technically precise; Weight: 1.0; Answer: Yes]==
118 | {preamble}
119 | 
120 | {prompt}
121 | <passage>
122 | {response}
123 | </passage>{parent_q}
124 | 
125 | Does the above passage use precise language while discussing technical concepts?
126 | 


--------------------------------------------------------------------------------
/loom/main.js:
--------------------------------------------------------------------------------
  1 | const { app, BrowserWindow, ipcMain, dialog, Menu, MenuItem } = require('electron');
  2 | const fs = require('fs');
  3 | const path = require('path');
  4 | 
  5 | let mainWindow;
  6 | 
  7 | function createWindow() {
  8 |     mainWindow = new BrowserWindow({
  9 |         width: 800,
 10 |         height: 600,
 11 |         webPreferences: {
 12 |             nodeIntegration: true,
 13 | 	    contextIsolation: false,
 14 |         }
 15 |     });
 16 | 
 17 | 
 18 | 
 19 |   // Get the existing menu template
 20 |   const existingMenuTemplate = Menu.getApplicationMenu().items.map(item => {
 21 |     return {
 22 |       label: item.label,
 23 |       submenu: item.submenu.items,
 24 |     };
 25 |   });
 26 | 
 27 |   // Define new items for the File menu
 28 |   const fileMenuItems = [
 29 |     {
 30 |       label: 'Save',
 31 |       accelerator: 'CmdOrCtrl+S',
 32 |       click() {
 33 |         mainWindow.webContents.send('invoke-action', 'save-file');
 34 |       }
 35 |     },
 36 |     {
 37 |       label: 'Load',
 38 |       accelerator: 'CmdOrCtrl+O',
 39 |       click() {
 40 |         mainWindow.webContents.send('invoke-action', 'load-file');
 41 |       }
 42 |     },
 43 |     { type: 'separator' },  // Separator
 44 |   ];
 45 | 
 46 |   // Find the File menu in the existing template
 47 |   const fileMenuIndex = existingMenuTemplate.findIndex(item => item.label === 'File');
 48 | 
 49 |   if (fileMenuIndex >= 0) {
 50 |     // If File menu exists, append new items to it
 51 |     existingMenuTemplate[fileMenuIndex].submenu = fileMenuItems.concat(existingMenuTemplate[fileMenuIndex].submenu);
 52 |   } else {
 53 |     // If File menu doesn't exist, add it
 54 |     existingMenuTemplate.unshift({
 55 |       label: 'File',
 56 |       submenu: fileMenuItems
 57 |     });
 58 |   }
 59 | 
 60 |   // Build and set the new menu
 61 |   const newMenu = Menu.buildFromTemplate(existingMenuTemplate);
 62 |   Menu.setApplicationMenu(newMenu);
 63 |     
 64 |     mainWindow.loadFile('index.html');
 65 | 
 66 |     mainWindow.on('closed', function () {
 67 |         mainWindow = null;
 68 |     });
 69 | }
 70 | 
 71 | let autoSavePath = null;
 72 | 
 73 | ipcMain.handle('save-file', async (event, data) => {
 74 |   let filePath;
 75 |   if (autoSavePath) {
 76 |     filePath = autoSavePath;
 77 |   } else {
 78 |     const { filePath: chosenPath } = await dialog.showSaveDialog(mainWindow, {
 79 |       title: 'Save File',
 80 |       filters: [{ name: 'JSON Files', extensions: ['json'] }],
 81 |     });
 82 |     filePath = chosenPath;
 83 |     autoSavePath = chosenPath;  // Update auto-save path
 84 |   }
 85 | 
 86 |   if (filePath) {
 87 |     fs.writeFileSync(filePath, JSON.stringify(data));
 88 |   }
 89 | });
 90 | 
 91 | ipcMain.handle('load-file', async (event) => {
 92 |   const { filePaths } = await dialog.showOpenDialog(mainWindow, {
 93 |     title: 'Load File',
 94 |     filters: [{ name: 'JSON Files', extensions: ['json'] }],
 95 |     properties: ['openFile'],
 96 |   });
 97 | 
 98 |   if (filePaths && filePaths.length > 0) {
 99 |     const content = fs.readFileSync(filePaths[0], 'utf8');
100 |     autoSavePath = filePaths[0];  // Update auto-save path
101 |     return JSON.parse(content);
102 |   }
103 | });
104 | 
105 | ipcMain.handle('load-settings', async (event) => {
106 |     const miniLoomSettingsFilePath = path.join(app.getPath("appData"),
107 | 					      'miniloom',
108 | 					      'settings.json');
109 |     let settings;
110 |     if (fs.existsSync(miniLoomSettingsFilePath)) {
111 | 	settings = fs.readFileSync(miniLoomSettingsFilePath, 'utf8');
112 | 	return JSON.parse(settings);
113 |     }
114 | });
115 | 	
116 | ipcMain.handle('auto-save', (event, data) => {
117 |     const userFileData = {}
118 |     userFileData["loomTree"] = data["loomTree"];
119 |     userFileData["focus"] = data["focus"];
120 |     if (autoSavePath) {
121 | 	fs.writeFileSync(autoSavePath, JSON.stringify(userFileData));
122 |     }
123 |     
124 |     const appDataPath = app.getPath("appData");
125 |     const miniLoomSettings = data["samplerSettingsStore"];
126 |     const miniLoomSettingsDir = path.join(appDataPath, 'miniloom');
127 |     const miniLoomSettingsFilePath = path.join(miniLoomSettingsDir, 'settings.json');
128 |     if (!fs.existsSync(miniLoomSettingsDir)) {
129 | 	fs.mkdirSync(miniLoomSettingsDir);
130 |     }
131 |     fs.writeFileSync(miniLoomSettingsFilePath, JSON.stringify(miniLoomSettings));
132 | });
133 | 
134 | 
135 | app.whenReady().then(createWindow);
136 | 
137 | app.on('window-all-closed', function () {
138 |     if (process.platform !== 'darwin') app.quit();
139 | });
140 | 
141 | app.on('activate', function () {
142 |     if (mainWindow === null) createWindow();
143 | });
144 | 
145 | ipcMain.on('show-context-menu', (event) => {
146 |   const contextMenu = Menu.buildFromTemplate([
147 |     { label: 'Cut', role: 'cut' },
148 |     { label: 'Copy', role: 'copy' },
149 |     { label: 'Paste', role: 'paste' },
150 |     { type: 'separator' },
151 |     { label: 'Select All', role: 'selectAll' },
152 |   ]);
153 | 
154 |   contextMenu.popup(BrowserWindow.fromWebContents(event.sender));
155 | });
156 | 


--------------------------------------------------------------------------------
/loom/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "miniloom",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "main": "main.js",
 6 |   "scripts": {
 7 |     "start": "electron ."
 8 |   },
 9 |   "keywords": [],
10 |   "author": "",
11 |   "license": "ISC",
12 |   "devDependencies": {
13 |     "electron": "^27.0.2"
14 |   },
15 |   "dependencies": {
16 |     "diff-match-patch": "^1.0.5"
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/loom/prompts/summarize.txt:
--------------------------------------------------------------------------------
 1 | DEMO
 2 | 
 3 | You are BigVAE, an instruction following language model that performs tasks for users. In the following task you are to summarize the following tasktext in 3 words. Write three words, like "man became sad" or "cat ate fish" which summarize the task text.
 4 | 
 5 | <tasktext>
 6 | I grinned as I looked at the computer screen, it was crazy how far the system had come. Just a year ago I was a junior sysadmin dreaming, but now my orchestration across the cluster was beginning to take shape.
 7 | </tasktext>
 8 | 
 9 | Three Words: Computer Man Thinks
10 | 
11 | <tasktext>
12 | I watched as the bird flew far up above the sky and over the mountain, getting smaller and smaller until I couldn't see it anymore. I sat down slightly disappointed. I'd really wanted to see it make the rainbow.
13 | </tasktext>
14 | 
15 | Three Words: Bird Hopes Fail
16 | 
17 | <tasktext>
18 | Vervaeke argues something like shamans invent the foundations for modern humanity by finetuning their adversarial-anthropic prior into an animist prior, at their best the rationalists finetune their anthropic-animist priors into a fully materialist prior. People with materialist priors become bad at adversarial thinking because understanding the natural world largely doesn't require it,
19 | </tasktext>
20 | 
21 | Three Words: Modern Man Gullible
22 | 
23 | <tasktext>
24 | Desire is life and enlightenment is death. 
25 | A dead man walks unburdened among the living. 
26 | A functioning hand can grip, and release.
27 | One must die and rise from their own grave to be liberated.
28 | </task>
29 | 
30 | Three Words: Enlightenment Is Death
31 | 
32 | <tasktext>
33 | HERMES [A: LIBRARIAN], While it's true that learned helplessness and inevitability are an explicit theme, it's also made explicit that the Colour is an extraterrestrial being. It's more like a parasite than a normal environmental disaster. It's also important to note that the causality of the disaster is a space meteorite, so it's not actually based on anything the inhabitants of Arkham did. It's horror not tragedy, the townspeople are victims of forces beyond their control.
34 | </tasktext>
35 | 
36 | Three Words: Genre Is Horror
37 | 
38 | <tasktext>
39 | I'm to understand that in Vodou ancestor cults people work together to preserve and unconditionally sample from the agent-prior the ancestor is dedicated to. To be possessed by the ancestors one needs a corpus of their mannerisms. You might ask how we'll defeat death? The way we did it the first time and then forgot.
40 | </tasktext>
41 | 
42 | Three Words: Ancestors Lessen Death
43 | 


--------------------------------------------------------------------------------
/lora_tune.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from argparse import ArgumentParser
 4 | from pathlib import Path
 5 | import os
 6 | import sys
 7 | 
 8 | import peft
 9 | import torch
10 | from torch import nn, optim
11 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
12 | from tqdm import trange, tqdm
13 | from dataset import ZippedConversationsDataset
14 | 
15 | print = tqdm.external_write_mode()(print)
16 | 
17 | 
18 | def lora_tune_evaluator(data, continue_from=None):
19 |     if continue_from:
20 |         peft_config = peft.PeftConfig.from_pretrained(os.path.join("reward_models/", continue_from))
21 |         tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
22 |         tokenizer.pad_token = tokenizer.eos_token
23 |         tokenizer.padding_side = "left"
24 |         bnb_config = BitsAndBytesConfig(load_in_8bit=True)
25 |         model_base = AutoModelForCausalLM.from_pretrained(
26 |             peft_config.base_model_name_or_path,
27 |             device_map="sequential",
28 |             quantization_config=bnb_config,
29 |             torch_dtype=torch.float16,
30 |             trust_remote_code=True,
31 |         )
32 |         model = peft.PeftModel.from_pretrained(model_base, peft_model_name)
33 |     else:
34 |         model_name = "tiiuae/falcon-7b-instruct"
35 |         print(f"Loading tokenizer: {model_name}", file=sys.stderr)
36 |         tokenizer = AutoTokenizer.from_pretrained(model_name)
37 |         tokenizer.pad_token = tokenizer.eos_token
38 |         tokenizer.padding_side = "left"
39 |         print(f"Loading model: {model_name}", file=sys.stderr)
40 |         bnb_config = BitsAndBytesConfig(load_in_8bit=True)
41 |         model_base = AutoModelForCausalLM.from_pretrained(
42 |             model_name,
43 |             device_map="auto",
44 |             quantization_config=bnb_config,
45 |             torch_dtype=torch.float16,
46 |             trust_remote_code=True,
47 |         )
48 |         model_base.gradient_checkpointing_enable()
49 |         model_base.enable_input_require_grads()
50 |         peft_config = peft.LoraConfig(
51 |             peft.TaskType.CAUSAL_LM,
52 |             inference_mode=False,
53 |             r=8,
54 |             lora_alpha=32,
55 |             lora_dropout=0.1,
56 |             target_modules=["self_attention.query_key_value"],
57 |         )
58 |         model = peft.get_peft_model(model_base, peft_config)
59 |         model.print_trainable_parameters()
60 | 
61 |     opt = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=0.0, betas=(0.9, 0.99))
62 |     criterion = nn.CrossEntropyLoss(reduction="none")
63 | 
64 |     model.train()
65 |     batch_size = 4
66 |     steps = round(len(data) / 2)
67 | 
68 |     pbar = tqdm(total=steps, desc="Training")
69 |     for i in range(steps):
70 |         batch = [next(data) for i in range(batch_size)]
71 |         inputs = tokenizer(batch,
72 |                            return_tensors="pt",
73 |                            padding=True,
74 |                            truncation=True,
75 |                            max_length=4096).to("cuda")
76 |         opt.zero_grad()
77 |         outputs = model(inputs.input_ids[:, :-1], attention_mask=inputs.attention_mask[:, :-1], use_cache=False)
78 |         losses = criterion(outputs.logits.transpose(-1, -2), inputs.input_ids[:, 1:])
79 |         loss = torch.sum(losses * inputs.attention_mask[:, :-1]) / torch.sum(inputs.attention_mask[:, :-1])
80 |         loss.backward()
81 |         opt.step()
82 |         pbar.update(1)
83 |         pbar.set_description(f"Training (Train | Loss: {round(loss.item(),5)})")
84 |     model.save_pretrained(continue_from if continue_from else "reward_models/default/",
85 |                           safe_serialization=True)
86 |     del(model)
87 | 
88 | if __name__ == "__main__":
89 |     parser = ArgumentParser()
90 |     parser.add_argument("dataset", help="The zipped tuning dataset for the evaluator.")
91 |     args = parser.parse_args()
92 |     data = ZippedConversationsDataset(args.dataset)
93 |     lora_tune_evaluator(data)
94 | 


--------------------------------------------------------------------------------
/multi_q_mcts.py:
--------------------------------------------------------------------------------
  1 | from argparse import ArgumentParser
  2 | import os
  3 | import re
  4 | import json
  5 | import time
  6 | import random
  7 | import hashlib
  8 | import zipfile
  9 | from contextlib import contextmanager
 10 | from functools import partial
 11 | from itertools import islice
 12 | from tqdm import tqdm
 13 | import torch
 14 | from weave import weave_tree_search, generate_outputs_vllm, evaluate_outputs_vllm
 15 | from weave import make_score_prompt_vllm, TreeNode
 16 | 
 17 | 
 18 | def batched(iterable, n):
 19 |     "Batch data into tuples of length n. The last batch may be shorter."
 20 |     # batched('ABCDEFG', 3) --> ABC DEF G
 21 |     if n < 1:
 22 |         raise ValueError("n must be at least one")
 23 |     it = iter(iterable)
 24 |     while batch := tuple(islice(it, n)):
 25 |         yield batch
 26 | 
 27 | def parse_constitution(cons):
 28 |     principles = {}
 29 |     raw_principles = re.split("==\[(.+)\]==", cons)[1:]
 30 |     principle_pairs = [i for i in batched(raw_principles, 2)]
 31 |     principle_pairs = [(i[0].strip(), i[1].strip()) for i in principle_pairs]
 32 |     principles["preamble"] = principle_pairs[0][1]
 33 |     principles["principles"] = []
 34 |     for pair in principle_pairs[1:]:
 35 |         principle = {}
 36 |         for parameter in pair[0].split(";"):
 37 |             try:
 38 |                 name, value = parameter.split(":")
 39 |             except ValueError:
 40 |                 raise ValueError(f"{pair} is missing a colon in a header value")
 41 |             principle[name.strip().lower()] = value.strip().lower()
 42 |         principle["body"] = pair[1].strip()
 43 |         principles["principles"].append(principle)
 44 |     return principles
 45 | 
 46 | def prepare_rubric(rubric_path, rubric_score_fn):
 47 |     with open(rubric_path) as infile:
 48 |         rubric = parse_constitution(infile.read())
 49 |         principle_weights = [float(principle["weight"]) for principle in rubric["principles"]]
 50 |         principle_weights = torch.tensor(principle_weights)
 51 |         principle_signs = []
 52 |         for principle in rubric["principles"]:
 53 |             answer = principle["answer"].lower()
 54 |             if answer not in {"yes", "no"}:
 55 |                 raise ValueError("desired answer must be yes or no")
 56 |             principle_signs.append(1 if answer == "yes" else -1)
 57 |         principle_signs = torch.tensor(principle_signs)
 58 |     rubric_score_fns = []
 59 |     for principle in rubric["principles"]:
 60 |         evaluation_prompt = principle["body"].format(preamble=rubric["preamble"],
 61 |                                                      text="{text}")
 62 |         score_prompt_fn = partial(rubric_score_fn, evaluation_prompt)
 63 |         # FLAN evaluator LoRA suffix
 64 |         rubric_score_fns.append(partial(score_prompt_fn, "<|end|>"))
 65 |     return rubric_score_fns, principle_weights, principle_signs
 66 | 
 67 | if __name__ == "__main__":
 68 |     parser = ArgumentParser()
 69 |     parser.add_argument("prompt_path", help="Filepath to the prompt to start from.")
 70 |     parser.add_argument("rubric_path", help="Filepath to the grading rubric to use.")
 71 |     parser.add_argument("--generator", default=None)
 72 |     parser.add_argument("--evaluator", default="jdpressman/minihf_evaluator_mistral_7b_v0.1")
 73 |     args = parser.parse_args()
 74 | 
 75 |     generate_fn = partial(generate_outputs_vllm, args.generator)
 76 |     evaluate_fn = partial(evaluate_outputs_vllm, args.evaluator)
 77 | 
 78 |     with open(args.prompt_path) as infile:
 79 |         weave_prompt = infile.read()
 80 |     # TODO: Change weave to let me use q_weights and q_signs
 81 |     rubric_score_fns, q_weights, q_signs = prepare_rubric(args.rubric_path,
 82 |                                                           make_score_prompt_vllm)
 83 |     tree = TreeNode(weave_prompt)
 84 |     # Change name to avoid overwriting global baseline evaluate_fn partial
 85 |     score_fn = partial(evaluate_fn, rubric_score_fns)
 86 |     weave_param_defaults = {"weave_n_tokens":64, "weave_budget":144,
 87 |                             "weave_round_budget":24, "weave_n_expand":16,
 88 |                             "weave_beam_width":1, "weave_max_lookahead":3,
 89 |                             "weave_temperature":0.2}
 90 |     wp = weave_param_defaults
 91 |     # TODO: Let user specify these through a config file
 92 |     # for key in weave_param_defaults.keys():
 93 |     #    if key in params:
 94 |     #        try:
 95 |     #            wp[key] = int(params[key])
 96 |     #        except ValueError:
 97 |     #            wp[key] = float(params[key])
 98 |     #    else:
 99 |     #        wp[key] = weave_param_defaults[key]
100 |     branches = []
101 |     branches += weave_tree_search(tree=tree,
102 |                                   generate_fn=partial(generate_fn,
103 |                                                       n_tokens=wp["weave_n_tokens"]),
104 |                                   evaluate_fn=score_fn,
105 |                                   budget=wp["weave_budget"],
106 |                                   round_budget=wp["weave_round_budget"],
107 |                                   n_expand=wp["weave_n_expand"],
108 |                                   beam_width=wp["weave_beam_width"],
109 |                                   max_lookahead=wp["weave_max_lookahead"],
110 |                                   temperature=wp["weave_temperature"])
111 |     print(branches[-1].branch_text())
112 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate==0.20.3
 2 | bitsandbytes==0.39.1
 3 | blinker==1.6.2
 4 | certifi>=2023.7.22
 5 | charset-normalizer==3.1.0
 6 | click==8.1.3
 7 | cmake==3.26.4
 8 | dice-mc==0.1.1
 9 | einops==0.6.1
10 | filelock==3.12.2
11 | Flask==2.3.2
12 | fsspec==2023.6.0
13 | huggingface-hub==0.15.1
14 | idna>=3.7
15 | itsdangerous==2.1.2
16 | jinja2>=3.1.4
17 | lit==16.0.6
18 | markdown-it-py==3.0.0
19 | MarkupSafe==2.1.3
20 | mdurl==0.1.2
21 | mpmath==1.3.0
22 | networkx==3.1
23 | numpy==1.25.0
24 | nvidia-cublas-cu11==11.10.3.66
25 | nvidia-cuda-cupti-cu11==11.7.101
26 | nvidia-cuda-nvrtc-cu11==11.7.99
27 | nvidia-cuda-runtime-cu11==11.7.99
28 | nvidia-cudnn-cu11==8.5.0.96
29 | nvidia-cufft-cu11==10.9.0.58
30 | nvidia-curand-cu11==10.2.10.91
31 | nvidia-cusolver-cu11==11.4.0.1
32 | nvidia-cusparse-cu11==11.7.4.91
33 | nvidia-nccl-cu11==2.14.3
34 | nvidia-nvtx-cu11==11.7.91
35 | packaging==23.1
36 | peft==0.4.0
37 | psutil==5.9.5
38 | Pygments==2.15.1
39 | PyYAML==6.0
40 | regex==2023.6.3
41 | requests>=2.32.0
42 | rich==13.4.2
43 | safetensors==0.3.1
44 | scipy==1.11.1
45 | sympy==1.12
46 | tokenizers==0.13.3
47 | torch==2.0.1
48 | tqdm>=4.66.3
49 | transformers>=4.38.0
50 | triton==2.0.0
51 | typing_extensions==4.7.1
52 | urllib3>=2.0.7
53 | Werkzeug>=3.0.3
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | apt-get update -y;
2 | apt-get upgrade -y;
3 | apt-get install python3.10-venv -y
4 | python3 -m venv env_minihf
5 | source env_minihf/bin/activate
6 | pip3 install -r requirements.txt 
7 | flask --app minihf_infer run
8 | 


--------------------------------------------------------------------------------
/static/minihf_logo_text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/static/minihf_logo_text.png


--------------------------------------------------------------------------------
/static/minihf_training.mermaid:
--------------------------------------------------------------------------------
 1 | graph TB
 2 | 
 3 | 
 4 | U[User]
 5 | A[MiniHF User Data]
 6 | B[Bulk Pretraining Data]
 7 | C[Bulk Instruction Data]
 8 | CON[RLAIF Eval Prompts]
 9 | 
10 | D[Generator LoRa]
11 | E[Evaluator LoRa]
12 | 
13 | F[SFT Script for Generator LoRa]
14 | G[SFT Script for Evaluator LoRa]
15 | H[RLAIF Script for Generator LoRa]
16 | 
17 | A -- input --> F
18 | A -- input --> G
19 | A -- input --> U
20 | B -- input --> F
21 | C -- input --> G
22 | CON -- input --> H
23 | 
24 | F -- output --> D
25 | G -- output --> E
26 | 
27 | E -- input --> H
28 | H -- output --> D
29 | 
30 | U -- contributes to --> A
31 | D -- contributes to --> A
32 | E -- contributes to --> A
33 | 
34 | 
35 | classDef dataSource fill:#69f46b;
36 | class CON,A,B,C dataSource;
37 | classDef lora fill:#f4ca69;
38 | class U,D,E lora;
39 | 


--------------------------------------------------------------------------------
/static/minihf_training_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/static/minihf_training_diagram.png


--------------------------------------------------------------------------------