├── .gitignore
├── LICENSE
├── README.md
├── agent
├── Dockerfile
├── README.md
├── backtrack_stems.txt
├── block_generators.py
├── block_linters.py
├── bootstraps
│ ├── agent_django_interface_bootstrap.py
│ ├── bootstrap.py
│ ├── browser_tool_bootstrap.py
│ ├── discord_bootstrap.py
│ ├── discord_bootstrap_jdp.py
│ ├── dome39_scifi_bootstrap.py
│ ├── example_blocks
│ │ ├── discord_evaluation1.json
│ │ ├── discord_evaluation1.py
│ │ ├── discord_observation_inference1.json
│ │ ├── discord_observation_inference1.py
│ │ ├── discord_observation_inference2.json
│ │ ├── discord_observation_inference2.py
│ │ ├── discord_orientation1.json
│ │ ├── discord_orientation1.py
│ │ ├── discord_orientation2.json
│ │ ├── discord_orientation2.py
│ │ ├── haunted_mansion_orientation1.json
│ │ ├── haunted_mansion_orientation1.py
│ │ ├── nano_editor1.json
│ │ ├── nano_editor1.py
│ │ ├── nano_editor_action.json
│ │ ├── nano_editor_action.py
│ │ ├── nethack_action1.json
│ │ ├── nethack_action1.py
│ │ ├── nethack_action2.json
│ │ ├── nethack_action2.py
│ │ ├── nethack_action3.json
│ │ ├── nethack_action3.py
│ │ ├── nethack_action4.json
│ │ ├── nethack_action4.py
│ │ ├── nethack_action5.json
│ │ ├── nethack_action5.py
│ │ ├── nethack_check_inventory.json
│ │ ├── nethack_check_inventory.py
│ │ ├── nethack_evaluation1.json
│ │ ├── nethack_evaluation1.py
│ │ ├── nethack_orientation1.json
│ │ ├── nethack_orientation1.py
│ │ ├── nethack_orientation2.json
│ │ ├── nethack_orientation2.py
│ │ ├── nethack_orientation3.json
│ │ ├── nethack_orientation3.py
│ │ ├── nethack_pick_up_item.json
│ │ ├── nethack_pick_up_item.py
│ │ ├── rabbit_hole_refocus1.json
│ │ ├── rabbit_hole_refocus1.py
│ │ ├── send_discord_message.json
│ │ ├── send_discord_message.py
│ │ ├── send_discord_message2.json
│ │ ├── send_discord_message2.py
│ │ ├── send_discord_message3.json
│ │ ├── send_discord_message3.py
│ │ ├── weave_editor_append.json
│ │ ├── weave_editor_append.py
│ │ ├── weave_editor_diff_edit.json
│ │ └── weave_editor_diff_edit.py
│ ├── haunted_mansion_bootstrap.py
│ ├── haunted_mansion_bootstrap_emacs.py
│ ├── haunted_mansion_bootstrap_nano.py
│ ├── nano_exercises
│ │ └── prompt.txt
│ ├── nano_exercises_bootstrap.py
│ ├── nano_practice_bootstrap.py
│ ├── nano_practice_bootstrap_easy.py
│ ├── nethack_bootstrap.py
│ ├── nethack_bootstrap_rooms.py
│ ├── nethack_bootstrap_rooms2.py
│ ├── nethack_discord_bootstrap.py
│ ├── pure_ocaml_bootstrap.py
│ ├── simple_convo_bootstrap.py
│ ├── sir_rowland_hill_bootstrap.py
│ ├── skim_retroinstruct_data_guide.py
│ ├── test_tictactoe_server.py
│ ├── test_vigenere.py
│ ├── tictactoe_bootstrap.py
│ ├── tictactoe_server.py
│ ├── tictactoe_single_bootstrap.py
│ ├── tictactoe_single_discord_bootstrap.py
│ ├── vigenere.py
│ ├── vigenere_bootstrap.py
│ ├── vigenere_bootstrap_nano.py
│ ├── wiki_citation_bootstrap.py
│ └── zombie_game_bootstrap.py
├── cache_hf.py
├── docs
│ ├── 2024-12-29-how-to-write-a-bootstrap-file.md
│ └── weave_agent_vs_react.png
├── embed_server.py
├── error_stems.txt
├── eval_rubrics
│ ├── action.txt
│ ├── backtrack.txt
│ ├── debug.txt
│ ├── evaluation.txt
│ ├── expectation.txt
│ ├── observation-inference.txt
│ ├── orientation.txt
│ └── task-inference.txt
├── long_backtrack_stems.txt
├── planner.py
├── prepare_rl_set_from_traces.py
├── python.lark
├── render_agent_trace.py
├── render_block.py
├── reproduce_vllm_bug_partial_utf8.py
├── requirements.txt
├── retrieval.py
├── run_without_errors_questions.txt
├── scratch.txt
├── sleep.py
├── templates
│ ├── describe1.txt
│ └── describe2.txt
├── test_block_extractors.py
├── test_block_linters.py
├── test_rl_reward_penalties.py
├── test_weave_kanban.py
├── tools
│ ├── atspi_desktop.py
│ ├── discord.py
│ ├── editor.py
│ ├── editor_test.txt
│ ├── emacs.py
│ ├── fuzz1.txt
│ ├── nano.py
│ ├── nethack.py
│ ├── test_diffs.py
│ ├── weave_zombie_game.py
│ └── zombie_game.py
├── trainer
│ ├── linear_4bit_sharded.py
│ ├── linear_4bit_sharded_test.py
│ ├── mistral3_ring_attn_test.py
│ ├── mixtral_ring_attn_test.py
│ ├── patch_model.py
│ ├── qwen2_ring_attn_test.py
│ ├── ring_attn.py
│ ├── ring_attn_test.py
│ ├── trainer.py
│ ├── trainer_preference.py
│ └── trainer_val_loss.py
├── tuning_preprocess.py
├── weave.py
└── weave_agent.py
├── dataset.py
├── diff_match_patch.py
├── dpo_loss.py
├── examples
├── bayes_evaluate_weave_test_cases.py
├── bayes_lorem_ipsum_rubric.txt
└── lorem_ipsum_cases.txt
├── find_ae_scale.py
├── hermes
├── hermes_constitution.txt
└── hermes_prompts.txt
├── loom
├── index.html
├── main.js
├── package.json
├── prompts
│ ├── rewrite.txt
│ └── summarize.txt
├── prose_samples.json
└── renderer.js
├── loom_tree.py
├── lora_tune.py
├── minihf_infer.py
├── multi_q_mcts.py
├── requirements.txt
├── rlaif_generator.py
├── rlaif_generator_dpo.py
├── setup.sh
├── sft_evaluator.py
├── sft_generator.py
├── static
├── minihf.html
├── minihf_logo_text.png
├── minihf_training.mermaid
└── minihf_training_diagram.png
├── train_vae_overlap.py
├── train_vae_router.py
├── vae_infer.py
└── weave.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *~
3 |
--------------------------------------------------------------------------------
/agent/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use an official Python runtime as a parent image
2 | FROM python:3.10-slim
3 |
4 | # Set the working directory in the container
5 | WORKDIR /app
6 | RUN mkdir /app/weave-agent-logs/
7 |
8 | # Copy only the requirements.txt file
9 | COPY requirements.txt /app/
10 |
11 | # Install any needed packages specified in requirements.txt
12 | RUN pip download pip setuptools wheel --dest=package-cache
13 | RUN pip install --no-index --find-links=package-cache pip setuptools
14 | RUN pip download -r requirements.txt --dest=package-cache
15 | RUN pip install -r requirements.txt --no-index --find-links=package-cache
16 | RUN pip install django
17 |
18 | # Install tmux and nethack
19 | RUN apt-get clean
20 | RUN apt-get autoclean
21 | RUN apt-get autoremove
22 | RUN apt-get update && apt-get upgrade -y
23 | RUN apt-get update && apt-get install -y tmux
24 | RUN apt-get update && apt-get install -y nethack-console
25 | RUN apt-get update && apt-get install -y emacs nano
26 |
27 | # Install Firefox and geckodriver
28 | RUN apt-get update && apt-get install -y wget
29 | RUN apt-get update && apt-get install -y firefox-esr
30 | RUN wget https://github.com/mozilla/geckodriver/releases/download/v0.29.1/geckodriver-v0.29.1-linux64.tar.gz
31 | RUN tar -xvzf geckodriver-v0.29.1-linux64.tar.gz -C /usr/local/bin/
32 | RUN chmod +x /usr/local/bin/geckodriver
33 |
34 | # Cache HuggingFace
35 | COPY hf_token.txt /app/
36 | COPY cache_hf.py /app/
37 | ENV MODEL "weave-agent"
38 | ENV TOKENIZER "a-m-team/AM-Thinking-v1"
39 | #ENV TOKENIZER "Qwen/QwQ-32B"
40 | #ENV MODEL "mistralai/Mistral-Small-3.1-24B-Base-2503"
41 | #ENV TOKENIZER "mistralai/Mistral-Small-3.1-24B-Base-2503"
42 | #ENV MODEL "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
43 | #ENV TOKENIZER "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
44 | RUN python3 cache_hf.py $TOKENIZER
45 | RUN rm /app/hf_token.txt
46 |
47 | # Copy the current directory contents into the container at /app
48 | COPY . /app
49 |
50 | # Make port 5001 available to the world outside this container
51 | EXPOSE 5001
52 |
53 | # Define environment variable
54 | ENV NAME World
55 |
56 | # Run the agent script when the container launches
57 | CMD python weave_agent.py --port 5001 --bootstrap "bootstraps/nano_exercises_bootstrap.py" $MODEL --tokenizer $TOKENIZER & python -m http.server 8991 --directory "/app/weave-agent-logs/"
58 |
--------------------------------------------------------------------------------
/agent/backtrack_stems.txt:
--------------------------------------------------------------------------------
1 | Why did I assign a low score to this action during selection? It
2 | Hm, something about this action feels off. Maybe
3 | The best action I could come up with was below the score threshold, is something wrong with it?
4 | Something about the selected block makes me uncomfortable.
5 | I have a hunch that's not the best action I could take.
6 | I gave this action a low score because
7 | So what alternative action could I write that would pass the score threshold?
8 | Let's reflect on why this action doesn't meet my standards.
9 | How could I improve on this action?
10 |
--------------------------------------------------------------------------------
/agent/bootstraps/bootstrap.py:
--------------------------------------------------------------------------------
1 | def bootstrap_callback(agent):
2 | # Set up a reminder
3 | agent.add_reminder({
4 | 'type': 'reminder',
5 | 'trigger_callback': lambda agent: simple_evaluate_outputs([make_simple_score_prompt("Is it time to remind the agent?"),], agent.context),
6 | 'reminder_callback': lambda agent: agent.add_block({'type': 'reminder', 'message': 'This is a reminder'}),
7 | 'trigger_type': 'yes_no_logit',
8 | 'question': 'Is it time to remind the agent?',
9 | 'threshold': 0.5
10 | })
11 |
12 | # Set up an observation view that reads from scratch.txt
13 | def read_scratch_file(agent):
14 | try:
15 | with open('scratch.txt', 'r') as file:
16 | content = file.read()
17 | return content
18 | except Exception as e:
19 | agent.add_error_block(f"Failed to read scratch.txt: {e}")
20 | return ""
21 |
22 | agent.add_observation_view({
23 | 'type': 'observation',
24 | 'title': 'scratch.txt',
25 | 'callback': read_scratch_file
26 | })
27 |
28 | bootstrap_callback(agent)
29 |
--------------------------------------------------------------------------------
/agent/bootstraps/discord_bootstrap.py:
--------------------------------------------------------------------------------
1 | import time
2 | import json
3 | import asyncio
4 | from tools.discord import WeaveDiscordClient
5 |
6 | #startblock type: orientation
7 | #timestamp 1724982545.6534579
8 | """
9 | WEAVER [P: EXPECTATION], I need to create a Discord bot that will interact with users in a specified channel.
10 | The bot should continue interacting until a user sends the message "You are dismissed."
11 |
12 | WEAVER [P: CLARIFICATION], How do I set up the Discord bot?
13 |
14 | WEAVER [P: EXPOSITION], You can set up the Discord bot using the provided Discord tool.
15 | This involves creating a Discord application, inviting the bot to the server, and configuring the bot with the necessary permissions.
16 |
17 | WEAVER [P: RATIONAL], The bot should be able to send and receive messages in the specified channel.
18 | It should also be able to react to messages and reply to specific messages if needed.
19 |
20 | WEAVER [P: CONCLUSION], I will create the main subagent to handle the interaction with the Discord bot.
21 | The bot will continue interacting with users until it receives the message "You are dismissed."
22 | """
23 | #endblock
24 |
25 | #startblock type: action
26 | #timestamp 1724982671.1163404
27 | """
28 | I'll set up the main agent to handle the interaction with the Discord bot.
29 | The bot will continue interacting with users until it receives the message "You are dismissed."
30 | """
31 |
32 | def create_main_agent_and_connect_discord(subagent):
33 | def check_dismissed(subagent):
34 | """Check if the bot has been dismissed by a user."""
35 | response = requests.post("http://localhost:8080", json={"action": "get_messages"})
36 | messages = response.json()
37 | for msg in messages:
38 | if msg["content"] == "You are dismissed." and msg["author"] != "weave-agent":
39 | return True
40 | return False
41 |
42 | schema = {"dismissed": "boolean"}
43 | main_agent = agent.subagent("main", None, "Interact with users until dismissed", schema, args.budget)
44 | main_agent.task.add_evaluation("Check if dismissed", check_dismissed)
45 |
46 | # Load Discord token and channel ID from discord.json
47 | with open('discord.json') as f:
48 | config = json.load(f)
49 | token = config['key']
50 | channel_id = config['cid']
51 |
52 | # Start the Discord bot
53 | client = WeaveDiscordClient(main_agent, token, channel_id)
54 | time.sleep(5)
55 | client.send_message("Hi!")
56 |
57 | # Actions taken by the main subagent should look like:
58 | #startblock type: action
59 | # def respond(subagent):
60 | # # Take the CID from the observation callback window
61 | # #title WeaveDiscordClient (discord-bot-CID)
62 | # # Extract the CID, which will be a number string from the above title
63 | # # Then use it to retrieve the discord client like below:
64 | # client = self.tools[discord-bot-CID]
65 | # client.send_message("Hello!")
66 | # self.add_action("Respond to the user", respond)
67 | #endblock
68 | # You don't have to re-initialize the Discord client or anything like that.
69 |
70 | return True
71 |
72 | self.add_action("Create main agent and connect to Discord", create_main_agent_and_connect_discord)
73 | #endblock
74 |
75 | #startblock type: expectation
76 | #timestamp 1724982799.3917813
77 | """
78 | The action should succeed if the Discord bot connects to the channel and starts interacting with users.
79 | The action should fail if the bot does not connect to the channel or fails to interact with users.
80 | """
81 | #endblock
82 |
83 | #startblock type: evaluation
84 | #timestamp 1724983062.124238
85 |
86 | def check_bot_connected(subagent):
87 | """Check if the bot has connected to the Discord channel."""
88 | response = requests.post("http://localhost:8080", json={"action": "get_messages"})
89 | if response.status_code == 200:
90 | return True
91 | return False
92 |
93 | self.add_evaluation(
94 | "Check bot connected",
95 | check_bot_connected
96 | )
97 | #endblock
98 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_evaluation1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "evaluation",
3 | "q": "Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block?",
4 | "score": 3.0,
5 | "index": 42,
6 | "timestamp": 1737490568.8009431,
7 | "time_remaining": 20415.256756067276,
8 | "bm25_query": "type:'evaluation' +weave-agent +framework +execute +code +agent +subagent +send +message +respond +personal +thought +internal +monologue +interaction +query +observation +callback +tools +discord +client +context +expectation +outcome +schema +validation +refine +response +user +message +error +execute +tick +execute-program +interaction-flow +goal-resolution +action-validation +bm25 +verify +parsed +messages +refinement +personal-thoughts +internal-experience +firsthand-view +chatbot +dialogue-loop +user-insight +query-response +personal-reflection +feedback",
9 | "description": "An evaluation block that defines a function 'check_reflective_response' to check if the bot's response to federovist's query is reflective and personal. The function retrieves the bot's recent messages using the discord client tool, then checks for the presence of reflective and personal keywords in the bot's response. If the response contains a sufficient number of reflective and personal keywords, the function returns 1, indicating a reflective response. The evaluation is added to the weave-agent with the title 'Check reflective response' and referencing the function name check_reflective_response."
10 | }
11 |
12 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_evaluation1.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: evaluation
3 | #index 42
4 | #timestamp 1737490568.8009431
5 | #time_remaining 20415.256756067276 seconds
6 | #bm25_query type:'evaluation' +weave-agent +framework +execute +code +agent +subagent +send +message +respond +personal +thought +internal +monologue +interaction +query +observation +callback +tools +discord +client +context +expectation +outcome +schema +validation +refine +response +user +message +error +execute +tick +execute-program +interaction-flow +goal-resolution +action-validation +bm25 +verify +parsed +messages +refinement +personal-thoughts +internal-experience +firsthand-view +chatbot +dialogue-loop +user-insight +query-response +personal-reflection +feedbac
7 |
8 | def check_reflective_response(subagent):
9 | """Check if the bot's response to federovist's query is reflective and personal."""
10 | context = subagent.tools['discord-bot-1325039818673094739'].get_messages()
11 | for msg in reversed(context):
12 | if msg['author'] == 'weave-agent':
13 | bot_response = msg['content']
14 | break
15 | else:
16 | return 0
17 | reflective_keywords = ['personal', 'first-hand', 'insights', 'feelings', 'experience', 'reflection', 'thoughts', 'perspective']
18 | personal_keywords = ['I', 'my', 'me', "I'm"]
19 | reflective_count = sum((bot_response.lower().count(keyword) for keyword in reflective_keywords))
20 | personal_count = sum((bot_response.lower().count(keyword) for keyword in personal_keywords))
21 | if reflective_count > 1 and personal_count > 0:
22 | return 1
23 | return 0
24 |
25 | self.add_evaluation('Check reflective response', check_reflective_response)
26 | #q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (73.375%)
27 | #endblock
28 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_observation_inference1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "observation-inference",
3 | "q": "Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick?",
4 | "score": 4,
5 | "index": 10,
6 | "timestamp": 1737489546.3470614,
7 | "time_remaining": 21437.71063184738,
8 | "bm25_query": "type:'observation-inference' +respond +messages +user -'Weaver' +client +action +'get_messages' +'add_observation_view' +tools +discord +context +interaction +dismissed +'You are dismissed.' +schema",
9 | "description": "An observation-inference block that discusses the current observation callback window, which includes a callback for fetching messages from the Discord client. The block concludes that the existing observation callback is appropriate for verifying whether the bot has successfully responded to the user message, and no new observation callbacks are necessary for this action."
10 | }
11 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_observation_inference1.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: observation-inference
3 | #index 10
4 | #timestamp 1737489546.3470614
5 | #time_remaining 21437.71063184738 seconds
6 | #bm25_query type:'observation-inference' +respond +messages +user -'Weaver' +client +action +'get_messages' +'add_observation_view' +tools +discord +context +interaction +dismissed +'You are dismissed.' +schema
7 | """
8 | The current observation callback window includes a callback for fetching messages from the Discord client.
9 | Since the observation is expected to verify whether the bot has successfully responded to the user message,
10 | the existing observation callback is appropriate and does not need to be reviewed or changed.
11 | Additionally, no new observation callbacks seem necessary for this action.
12 | """
13 | #q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? Yes. (98.681%)
14 | #endblock
15 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_observation_inference2.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "observation-inference",
3 | "q": "Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick?",
4 | "score": 4.5,
5 | "index": 34,
6 | "timestamp": 1737497836.5383182,
7 | "time_remaining": 21437.71063184738,
8 | "bm25_query": "type:'observation-inference' +respond +messages +user -'Weaver' +client +action +'get_messages' +'add_observation_view' +tools +discord +context +interaction +dismissed +'You are dismissed.' +schema",
9 | "description": "An observation-inference block that states no observation callbacks will be added or removed for the current task. The block is kept short and concise, indicating that the agent is prepared to judge the outcome of the action on the next tick without additional changes."
10 | }
11 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_observation_inference2.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: observation-inference
3 | #index 34
4 | #timestamp 1737497836.5383182
5 | #time_remaining 21437.71063184738 seconds
6 | #bm25_query type:'observation-inference' +respond +messages +user -'Weaver' +client +action +'get_messages' +'add_observation_view' +tools +discord +context +interaction +dismissed +'You are dismissed.' +schema
7 | """
8 | I'm not adding or removing any observation callbacks for this task and I've
9 | instructed myself to keep this short.
10 | """
11 | #q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? Yes. (95.239%)
12 | #endblock
13 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_orientation1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "orientation",
3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 | "score": 3.5,
5 | "index": 13,
6 | "timestamp": 1737489118.9665036,
7 | "description": "An orientation block that discusses the expectation that the user would send a message, which was validated. The user replied with 'Hello.', leaving the response ambiguous. Various personas (P: EXPECTATION, P: CAUTIOUS, P: EMPIRICISM, P: RATIONAL, P: ARBITER, P: CONCLUSION) discuss different approaches to respond. The conclusion is to use the WeaveDiscordClient to send the message 'How can I help you today?' to the user, keeping the action simple and direct."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_orientation1.py:
--------------------------------------------------------------------------------
1 | #startblock type: orientation
2 | #timestamp 1737489118.9665036
3 | """WEAVER [P: EXPECTATION], In the previous tick we wrote down an expectation
4 | that the user would send us a message if we wait. This expectation was validated
5 | because I can see in the WeaveDiscordClient that a user has replied to us. The
6 | user said 'Hello.', which leaves it ambiguous how we should reply. Ideas?
7 |
8 | WEAVER [P: CAUTIOUS], We could just wait for them to send another message and
9 | clarify their intent. That has a low risk for us.
10 |
11 | WEAVER [P: EMPIRICISM], Nah, if you look at the timestamp it's already been a few
12 | minutes since they sent that. If they were going to send us a follow up they'd have
13 | already done it.
14 |
15 | WEAVER [P: RATIONAL], We could ask them something sort of cliche and low entropy
16 | like "How are you?". That would put the ball in their court.
17 |
18 | WEAVER [P: EMPIRICISM], I think I'd prefer the direct approach. We ask them what
19 | they want from us, if anything. That would move the conversation forward faster.
20 |
21 | WEAVER [P: RATIONAL], Well, since we're in a chat assistant(?) type setup they
22 | presumably do want something from us but they also probably expect a certain
23 | measure of politeness. So maybe more like "How can I help you today?" than
24 | "What do you want?", since the latter can be read as hostile if the user has
25 | the wrong prior on what tone that sentence would be associated with.
26 |
27 | WEAVER [P: ARBITER], Sounds good to me, lets do that.
28 |
29 | WEAVER [P: CONCLUSION], We will use the WeaveDiscordClient to send the message
30 | "How can I help you today?" to the user. I will keep the action simple and just
31 | send the message, since I've already identified that the user has sent a message
32 | and what I should say in response."""
33 | #endblock
34 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_orientation2.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "orientation",
3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 | "score": 3.5,
5 | "index": 14,
6 | "timestamp": 1737496488.686458,
7 | "description": "An orientation block that discusses the expectation that the observation callback windows would contain the last bot message, the last user message, and the Mayan comparison message. Upon inspection, it is noticed that these windows are empty. Various personas (P: EXPECTATION, P: HYPOTHESIS, P: EMPIRICISM, P: RATIONAL, P: CONCLUSION) discuss potential causes, including a broken Discord connection or incorrectly implemented callbacks. The conclusion is to remove the errant observation callbacks titled 'Check User's Latest Message', 'Check Latest Bot Message', and 'Check Mayan Comparison Message'."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/discord_orientation2.py:
--------------------------------------------------------------------------------
1 | #startblock type: orientation
2 | #timestamp 1737496488.686458
3 | """WEAVER [P: EXPECTATION], I expected the observation callback windows to contain
4 | the last bot message, the last user message, and to show me the Mayan comparison
5 | message. But upon closer inspection I notice they're all empty. What do we think
6 | the cause of this is?
7 |
8 | WEAVER [P: HYPOTHESIS], My first thought would be that the connection with Discord
9 | has broken somehow. It's also possible that the callbacks aren't implemented
10 | correctly.
11 |
12 | WEAVER [P: EMPIRICISM], Well *I* notice that the user Fedorovist is currently
13 | shouting that they're not implemented right and we should just delete them.
14 |
15 | WEAVER [P: RATIONAL], He's also admonishing us for making more observation
16 | callbacks. Which, admittedly that was in fact a bad idea. I distracted myself
17 | from my perfectly working WeaveDiscordClient by making things that are redundant
18 | with its features.
19 |
20 | WEAVER [P: CONCLUSION], Alright. We'll remove the errant `#title Check User's
21 | Latest Message`, `#title Check Latest Bot Message`, and `#title Check Mayan
22 | Comparison Message` observation callbacks."""
23 | #endblock
24 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/haunted_mansion_orientation1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "orientation",
3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 | "score": 3.0,
5 | "index": 19,
6 | "timestamp": 1738459840.165862,
7 | "description": "An orientation block that discusses the addition of a description of Jeremy's thought process to the story, which was intended to improve character development but resulted in a decrease in the evaluation score. Various personas (P: EXPECTATION, P: AUTHOR, P: RATIONAL, P: PERFECTIONIST, P: EMPIRICISM, P: CONCLUSION) discuss the issue, identifying that the plain description detracts from the narrative cohesion. The conclusion is to move the explanatory text to a separate notes file named 'notes.txt' and replace the meta in the story with actual prose."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/haunted_mansion_orientation1.py:
--------------------------------------------------------------------------------
1 | #startblock type: orientation
2 | #timestamp 1738459840.165862
3 | """WEAVER [P: EXPECTATION], I added a description of Jeremy's thought process to
4 | the story in the hopes that q4, whether the story develops the four friends as
5 | characters, would go up. However I observe that it has gone down by 0.2. We need
6 | to figure out why this is occurring and take actions to flesh out the characters
7 | in a way that satisfies the weave evaluator.
8 |
9 | WEAVER [P: AUTHOR], Well the first thing I notice is that we put in a fairly
10 | plain description of Jeremy's inner monologue. This detracts from the narrative
11 | cohesion of the story, kind of like if we injected our notes into the text rather
12 | than write the text those notes are supposed to describe.
13 |
14 | WEAVER [P: RATIONAL], We could make a separate notes file to keep our worldbuilding
15 | separate from the actual story text.
16 |
17 | WEAVER [P: PERFECTIONIST], What should we name the notes file?
18 |
19 | WEAVER [P: EMPIRICISM], Uh, notes.txt?
20 |
21 | WEAVER [P: RATIONAL], Yeah that.
22 |
23 | WEAVER [P: CONCLUSION], We'll take the text that's currently in the story explaining
24 | Jeremy's mental state and put it in the notes.txt file, while replacing the meta
25 | in the story with actual prose.
26 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (81.225%)
27 | #endblock
28 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nano_editor1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 | "score": 2.5,
5 | "index": 11,
6 | "timestamp": 1747960518.1200407,
7 | "time_remaining": 21441.53399324417,
8 | "description": "Action block defining text corruption repair using nano editor. Compares cached original_lines with current file state to locate corruption (line_number/char_pos), navigates via control sequences (PgUp/C-n/Home), replaces corrupted byte with original character, and saves changes. Implements precise text file restoration workflow with cursor navigation assertions."
9 | }
10 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nano_editor1.py:
--------------------------------------------------------------------------------
1 | #startblock type: action
2 | #index 11
3 | #timestamp 1747960518.1200407
4 | #time_remaining 21441.53399324417 seconds
5 | #block_size I have 768 tokens (full) to write with
6 |
7 | def action_2_action_main_agent_corruption_repair(subagent):
8 | """Use nano editor to repair a corrupted text file by finding and replacing
9 | the corrupted byte."""
10 | editor = subagent.tools['nano-/app/excerpt.txt']
11 | original_lines = subagent.get_cache("original_lines")
12 | file_lines = open('excerpt.txt', 'r').readlines()
13 | assert original_lines != file_lines, "Is there a difference between original and file lines to find?"
14 | char_pos = 0
15 | line_number = 0
16 | for char1, char2 in zip(''.join(original_lines), ''.join(file_lines)):
17 | if char2 == "\n":
18 | line_number += 1
19 | char_pos = 0
20 | if char1 != char2:
21 | break
22 | char_pos += 1
23 | # Start at known location
24 | editor.send_command('PgUp')
25 | # Move down line_number lines from start position
26 | editor.send_commands(['C-n'] * line_number)
27 | # Go to home position on row
28 | editor.send_command("Home")
29 | # Move over past corrupted character
30 | editor.send_commands(['right'] * (char_pos + 1))
31 | # Backspace corrupted character
32 | editor.send_command('BSpace')
33 | # Replace with original character
34 | editor.send_command('i')
35 | # Save
36 | editor.send_commands(['C-o', 'Enter'])
37 | # Go back to start for screen capture
38 | editor.send_commands(['PgUp', 'C-c'])
39 | return True
40 |
41 | self.add_action('Action 2: Use nano editor to repair a corrupted text file', action_2_action_main_agent_corruption_repair)
42 | #q: Is this python action codeblock a step towards resolving the problems identified in the orientation block? Yes. (68.453%)
43 | #q: Does this block successfully run? Yes.
44 | #endblock
45 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nano_editor_action.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 | "score": 2.8,
5 | "timestamp": 1743146554.1548085,
6 | "description": "Recalled action block defining 'add_mansion_description' function that uses WeaveNano to edit horror.txt. Sequences nano commands to: 1) Navigate to insertion point 2) Add atmospheric mansion description 3) Save to file (C-o horror.txt). Demonstrates text editing workflow with cursor movement and file operations."
7 | }
8 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nano_editor_action.py:
--------------------------------------------------------------------------------
1 | # #startblock type: action
2 | # #timestamp 1743146554.1548085
3 | #
4 | # def add_mansion_description(subagent):
5 | # # Initialize nano editor
6 | # nano = WeaveNano(subagent)
7 | #
8 | # # Send commands to Nano
9 | # commands = [
10 | # 'C-a', # Move to the beginning of the line
11 | # 'C-d', # Move down
12 | # 'C-d', # Move down
13 | # 'C-d', # Move down
14 | # 'Enter',
15 | # 'The mansion was a decrepit structure, abandoned for years. Its facade,',
16 | # 'once grand and imposing, now bore the scars of neglect and time. Ivy clung',
17 | # 'to the crumbling stone, and the windows, shattered by the elements, cast a',
18 | # 'gloomy shadow over the overgrown lawn. A thick fog hung in the air,',
19 | # 'shrouding the mansion in an eerie silence.'
20 | # ]
21 | # for command in commands:
22 | # nano.send_command(command)
23 | # time.sleep(0.1)
24 | #
25 | # # Save to horror.txt and show current cursor location in file
26 | # commands = ['C-o', 'horror.txt', 'Enter', 'C-c']
27 | # for command in commands:
28 | # nano.send_command(command)
29 | # time.sleep(0.1)
30 | #
31 | # return True
32 | #
33 | # self.add_action("Add mansion description", add_mansion_description)
34 | # #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (82.671%)
35 | # #endblock
36 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 | "score": 2.8,
5 | "index": 32,
6 | "timestamp": 1737530984.1115146,
7 | "time_remaining": 20936.98700070381,
8 | "bm25_query": "type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25",
9 | "description": "An action block that defines a function 'go_down' to explore the initial dungeon level in Nethack. The function uses the 'nethack' tool to send the key 'j' to move down a step and then pauses for 1 second. The action is added to the weave-agent with the title 'Initial dungeon exploration' and referencing the function name go_down."
10 | }
11 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action1.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: action
3 | #index 32
4 | #timestamp 1737530984.1115146
5 | #time_remaining 20936.98700070381 seconds
6 |
7 | def go_down(subagent):
8 | """Make a single step down in nethack"""
9 | # Go down a step
10 | subagent.tools['nethack'].send_keys('j')
11 | time.sleep(1)
12 | return True
13 |
14 | self.add_action('Go down one step', go_down)
15 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (78.857%)
16 | #endblock
17 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action2.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 | "score": 4.0,
5 | "index": 19,
6 | "timestamp": 1737758400.8556511,
7 | "description": "An action block that defines a function 'leave_room' to exit the room by sending a series of commands using the 'nethack' tool. The function sends the keys 'l' followed by 'j' nine times, as proposed in the orientation block, to navigate out of the room. The action is added to the weave-agent with the title 'Leave room to explore rest of dungeon' and referencing the function name leave_room."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action2.py:
--------------------------------------------------------------------------------
1 | #startblock type: action
2 | #timestamp 1737758400.8556511
3 |
4 | def leave_room(subagent):
5 | """Exit the room by sending a series of commands."""
6 | subagent.tools['nethack'].send_keys("l" + ("j" * 9))
7 | return True
8 |
9 | self.add_action("Leave room to explore rest of dungeon", leave_room)
10 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (90.331%)
11 | #endblock
12 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action3.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards dungeon exploration 2) using appropriate combat/movement sequences 3) with context-aware tool use?",
4 | "score": 3.0,
5 | "index": 27,
6 | "timestamp": 1740912853.8571844,
7 | "time_remaining": 21552.925775289536,
8 | "description": "An action block that defines a function 'combat_and_navigate' to zap a kobold with wand then progress through northern corridor. Executes sequence: 1) Zap eastward ('z'+'l') 2) Move north 8x ('k'*8) 3) Turn east at junction ('l') 4) Move south 4x ('j'*4) to explore eastern branch."
9 | }
10 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action3.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: action
3 | #timestamp 1740912853.8571844
4 | #time_remaining 21552.925775289536 seconds
5 |
6 | def combat_and_navigate(subagent):
7 | """Engage kobold with wand then navigate northern corridor to eastern branch."""
8 | nethack = subagent.tools['nethack']
9 | # Zap eastward at kobold (z + l)
10 | nethack.send_keys("zl")
11 | # Move north through corridor (k repeated 8x)
12 | nethack.send_keys("k"*8)
13 | # Turn east at junction
14 | nethack.send_keys("l")
15 | # Move south down eastern branch (j repeated 4x)
16 | nethack.send_keys("jjjj")
17 | return True
18 |
19 | self.add_action("Engage kobold & navigate eastern branch", combat_and_navigate)
20 | #endblock
21 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action4.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Does this action 1) handle resource management 2) implement defensive exploration patterns 3) follow NetHack best practices?",
4 | "score": 3.0,
5 | "index": 6,
6 | "timestamp": 1740912854.1321564,
7 | "time_remaining": 21552.651803016663,
8 | "description": "Action block defining 'secure_chokepoint' function: 1) Search for traps (s) 2) Move west (h) 3) Throw dagger northeast (t + u) 4) Barricade door with boulder (E + 'push') 5) Rest for 5 turns (5 + '.'). Creates defensible position."
9 | }
10 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action4.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: action
3 | #timestamp 1740912854.1321564
4 | #time_remaining 21552.651803016663 seconds
5 | #bm25_query type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25
6 |
7 | def secure_chokepoint(subagent):
8 | """Create defensive position at corridor junction."""
9 | nh = subagent.tools['nethack']
10 | nh.send_keys("s") # Search for traps
11 | nh.send_keys("h") # Back into western alcove
12 | nh.send_keys("tu") # Throw dagger northeast
13 | nh.send_keys("Epush") # Engrave 'push' on floor
14 | nh.send_keys("5.") # Rest 5 turns
15 | return True
16 |
17 | self.add_action("Secure western alcove as chokepoint", secure_chokepoint)
18 | #endblock
19 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action5.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Does this 1) execute multi-phase objective 2) handle corpse/Inventory 3) progress dungeon level?",
4 | "score": 3.0,
5 | "index": 7,
6 | "timestamp": 1740912854.4091325,
7 | "time_remaining": 21552.374827077866,
8 | "description": "Defines 'descend_safely' function: 1) Wield silver saber (w) 2) Apply stethoscope to door (a + l) 3) Fight gelatinous cube (F + l) 4) Loot corpse (,) 5) Descend stairs (>). Implements cautious progression to next level."
9 | }
10 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_action5.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: action
3 | #timestamp 1740912854.4091325
4 | #time_remaining 21552.374827077866 seconds
5 | #bm25_query type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25
6 |
7 | def descend_safely(subagent):
8 | """Clear path to downstairs with combat and loot."""
9 | nh = subagent.tools['nethack']
10 | nh.send_keys("w/") # Wield silver saber
11 | nh.send_keys("al") # Apply stethoscope east
12 | nh.send_keys("Fl") # Fight eastward
13 | nh.send_keys(",") # Loot cube corpse
14 | nh.send_keys(">") # Descend stairs
15 | return True
16 |
17 | self.add_action("Secure and descend to dungeon level 2", descend_safely)
18 | #endblock
19 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_check_inventory.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 | "score": 2.8,
5 | "index": 33,
6 | "timestamp": 1737531000.1234567,
7 | "time_remaining": 20925.98700070381,
8 | "bm25_query": "type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25",
9 | "description": "An action block that defines a function 'check_inventory' to check the current inventory in Nethack. The function uses the 'nethack' tool to send the key 'i' to open the inventory screen and then pauses for 1 second. The action is added to the weave-agent with the title 'Check inventory' and referencing the function name check_inventory."
10 | }
11 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_check_inventory.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: action
3 | #index 33
4 | #timestamp 1737531000.1234567
5 | #time_remaining 20925.98700070381 seconds
6 | #bm25_query type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25
7 |
8 | def check_inventory(subagent):
9 | """Check the current inventory."""
10 | # Open the inventory screen
11 | subagent.tools['nethack'].send_keys('i')
12 | time.sleep(1)
13 | return True
14 |
15 | self.add_action('Check inventory', check_inventory)
16 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (80.123%)
17 | #endblock
18 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_evaluation1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "evaluation",
3 | "q": "Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block?",
4 | "score": 3.0,
5 | "index": 42,
6 | "timestamp": 1737568442.5842712,
7 | "time_remaining": 20415.256756067276,
8 | "description": "An evaluation block that defines a function 'have_enough_hitpoints' to check if the character has enough hitpoints after drinking a potion in Nethack. The function captures the game screen using the 'nethack' tool, extracts the current and maximum hitpoints using a regular expression, and returns 1 if the current hitpoints are 10 or more, otherwise returns 0. The evaluation is added to the weave-agent with the title 'Check enough hitpoints' and referencing the function name have_enough_hitpoints."
9 | }
10 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_evaluation1.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: evaluation
3 | #index 42
4 | #timestamp 1737568442.5842712
5 | #time_remaining 20415.256756067276 seconds
6 |
7 | def have_enough_hitpoints(subagent):
8 | import re
9 | """Check if we have enough hitpoints after drinking the potion."""
10 | screen = subagent.tools['nethack'].pane.capture_pane()
11 | pattern = r"HP:(\d+)\((\d+)\)"
12 | match = re.search(pattern, screen_text)
13 | current_hp = int(match.group(1))
14 | max_hp = int(match.group(2))
15 | if current_hp < 10:
16 | return 0
17 | else:
18 | return 1
19 |
20 | self.add_evaluation('Check enough hitpoints', have_enough_hitpoints)
21 | #q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (84.375%)
22 | #endblock
23 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "orientation",
3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 | "score": 3.5,
5 | "index": 15,
6 | "timestamp": 1737529718.3060572,
7 | "description": "An orientation block that discusses the expectation that the action would get past the introductory screen and start the game, which has successfully happened. Various personas (P: EXPECTATION, P: EMPIRICISM, P: RATIONAL, P: JUDGE, P: CONCLUSION) discuss the next steps, including walking around to get a sense of the floor or checking the inventory. The conclusion is to check the inventory, as it provides valuable information about the items available at the start of the game, which can significantly impact gameplay strategies."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation1.py:
--------------------------------------------------------------------------------
1 | #startblock type: orientation
2 | #timestamp 1737529718.3060572
3 | """WEAVER [P: EXPECTATION], My expectation was that the action would get me past
4 | the introductory screen and I would begin the game. The game has in fact started.
5 | Now we need to figure out what to do.
6 |
7 | WEAVER [P: EMPIRICISM], We could walk around and get a sense of the floor we're on.
8 | After all the game is procedurally generated so it's different for each run.
9 |
10 | WEAVER [P: RATIONAL], I say we check our inventory, since what items you spawn with
11 | is randomized between playthroughs and what you start out with tells you a lot
12 | about what your options are in combat, getting past obstacles, etc.
13 |
14 | WEAVER [P: JUDGE], Checking our inventory doesn't seem like a terrible idea, since
15 | it's a free action and provides us an advantage.
16 |
17 | WEAVER [P: EMPIRICISM], I don't have a strong objection or anything but I do feel
18 | obligated to point out it's not a *free* action. We still have a six hour time
19 | limit to finish the game (is it even possible to finish Nethack in six hours?)
20 | so getting in the habit of playing slowly because checking our inventory and such
21 | is "free" means we won't finish the game in time.
22 |
23 | WEAVER [P: RATIONAL], This is true but I still think we should check our inventory
24 | since the game just started and sometimes you spawn with something overpowered like
25 | a wand of death, and we would probably like to know if we have that.
26 |
27 | WEAVER [P: CONCLUSION], Yeah let's check our inventory."""
28 |
29 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? No. (10%)
30 | #endblock
31 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation2.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "orientation",
3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 | "score": 2.8,
5 | "index": 16,
6 | "timestamp": 1737570044.317845,
7 | "description": "An orientation block that discusses the expectation of moving down, but the character isn't moving due to a wall. Various personas (P: EXPECTATION, P: EMPIRICISM, P: CONCLUSION) discuss the issue and propose moving in another direction, such as left (h), right (l), or up (k). The conclusion is to move up."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation2.py:
--------------------------------------------------------------------------------
1 | #startblock type: orientation
2 | #timestamp 1737570044.317845
3 | """WEAVER [P: EXPECTATION], My expectation was that I would move down. However the
4 | character isn't moving.
5 |
6 | WEAVER [P: EMPIRICISM], Yeah because there's a wall in the way. Try moving another
7 | direction such as left (h), right (l), or up (k).
8 |
9 | WEAVER [P: CONCLUSION], Okay I'll move up."""
10 |
11 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (78.7%)
12 | #endblock
13 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation3.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "orientation",
3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 | "score": 2.9,
5 | "index": 17,
6 | "timestamp": 1737758220.8556511,
7 | "description": "An orientation block that discusses the action taken to leave a room by sending the keys 'jjjh' to reach the stairs and move to a different floor. Various personas (P: EXPECTATION, P: RATIONAL, P: EMPIRICISM, P: MOTOR, P: CONCLUSION) discuss the next steps, identifying an opening in the room to the northwest. The conclusion is to send the keys 'l' + ('j' * 9) to exit the room through the identified opening."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_orientation3.py:
--------------------------------------------------------------------------------
1 | #startblock type: orientation
2 | #timestamp 1737758220.8556511
3 | """WEAVER [P: EXPECTATION], The purpose of the action I took was to leave the room.
4 | I sent the keys "jjjh" to try and reach the stairs ( > ) and now seem to be on
5 | a different floor.
6 |
7 | WEAVER [P: RATIONAL], So what now?
8 |
9 | WEAVER [P: EMPIRICISM], We should leave this room by going northwest, as indicated
10 | by the open + that's edge aligned with the walls |, which indicates a opening in
11 | this room we can leave and explore through.
12 |
13 | WEAVER [P: MOTOR], That means we send "l" + ("j" * 9).
14 |
15 | WEAVER [P: CONCLUSION], I will send "l" + ("j" * 9) to exit the room."""
16 |
17 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (80.351%)
18 | #endblock
19 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_pick_up_item.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 | "score": 2.8,
5 | "index": 35,
6 | "timestamp": 1737531030.1234567,
7 | "time_remaining": 20905.98700070381,
8 | "bm25_query": "type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25",
9 | "description": "An action block that defines a function 'pick_up_items' to pick up items from the ground in Nethack. The function uses the 'nethack' tool to send the key ',' to pick up items and then pauses for 1 second. The action is added to the weave-agent with the title 'Pick up items' and referencing the function name pick_up_items."
10 | }
11 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/nethack_pick_up_item.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: action
3 | #index 35
4 | #timestamp 1737531030.1234567
5 | #time_remaining 20905.98700070381 seconds
6 | #bm25_query type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25
7 |
8 | def pick_up_items(subagent):
9 | """Pick up items from the ground."""
10 | # Pick up items
11 | subagent.tools['nethack'].send_keys(',')
12 | time.sleep(1)
13 | return True
14 |
15 | self.add_action('Pick up items', pick_up_items)
16 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (84.789%)
17 | #endblock
18 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/rabbit_hole_refocus1.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "orientation",
3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?",
4 | "score": 4.0,
5 | "index": 20,
6 | "timestamp": 1738562204.81305,
7 | "description": "An orientation block that discusses the repeated attempts to add the 'get_excerpt' method to the weave-agent, which have been unsuccessful for 30 minutes. Various personas (P: EXPECTATION, P: EMPIRICISM, P: VALENCE, P: RATIONAL, P: HYPOTHESIS, P: AUTHOR, P: CONCLUSION) discuss the issue and propose alternative approaches. The conclusion is to shift focus back to writing the short story and fleshing out the four friends to improve the evaluation score, using existing functions like 'edit()' instead of trying to implement 'get_excerpt'."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/rabbit_hole_refocus1.py:
--------------------------------------------------------------------------------
1 | #startblock type: orientation
2 | #timestamp 1738562204.81305
3 | """WEAVER [P: EXPECTATION], I've been trying the same basic thing, adding the
4 | get_excerpt method to the weave-agent for a while now. I notice it's been 30
5 | minutes since I started trying to add the method and it's still not working.
6 | That means it's probably time to try a different approach?
7 |
8 | WEAVER [P: EMPIRICISM], Yes. :p
9 |
10 | WEAVER [P: VALENCE], I notice that we've been giving ourselves low scores on
11 | our blocks for a while, which tells me that we probably should have re-evaluated
12 | our approach a while ago.
13 |
14 | WEAVER [P: EMPIRICISM], As I've been saying yes. :p :p
15 |
16 | WEAVER [P: RATIONAL], So what should we do instead?
17 |
18 | WEAVER [P: EMPIRICISM], Lets go back to what we were doing *before* we got mixed
19 | up in this `get_excerpt` business. If there's no `get_excerpt` method what would
20 | we do instead to accomplish our goal of *writing the short story?*
21 |
22 | WEAVER [P: HYPOTHESIS], 1) Use another function of the weave_editor that we know
23 | exists like edit() 2) Use a different mechanism to pull the relevant span from the
24 | horror.txt file such as reading the lines into a list and then grabbing a certain
25 | slice of the list
26 |
27 | WEAVER [P: AUTHOR], I think we should just use the edit() feature and give up on
28 | trying to grab excerpts from the horror.txt file for now. It seems like a distraction
29 | from our goal.
30 |
31 | WEAVER [P: RATIONAL], What were we doing again in terms of writing the story?
32 |
33 | WEAVER [P: AUTHOR], We were fleshing out the four friends so that we get a better
34 | in the problem map/evaluations.
35 |
36 | WEAVER [P: CONCLUSION], Alright. We will shift our focus to writing the short
37 | story again. We will flesh out the four friends so we get a better score.
38 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (94.185%)
39 | #endblock
40 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message.json:
--------------------------------------------------------------------------------
1 | {"type":"action",
2 | "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
3 | "score":3.5,
4 | "index":10,
5 | "timestamp":1737402469.3381844,
6 | "description":"A simple action callback named 'respond' that sends a discord message. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then uses the client.send_message ( client send message ) method of the discord client tool to send the message 'Hello!'. Finally self.add_action ( self dot add action ) is used to add the action callback to the weave-agent with the title 'Respond to the user' and referencing the function name respond"
7 | }
8 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message.py:
--------------------------------------------------------------------------------
1 | #startblock type: action
2 | #timestamp 1737402469.3381844
3 | # def respond(subagent):
4 | # # Take the CID from the observation callback window
5 | # client = self.tools[discord-bot-1325039818673094739]
6 | # client.send_message("Hello!")
7 | # self.add_action("Respond to the user", respond)
8 | #endblock
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message2.json:
--------------------------------------------------------------------------------
1 | {"type":"action",
2 | "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
3 | "score":3.5,
4 | "index":11,
5 | "timestamp": 1737486187.765493,
6 | "description":"An action callback named 'send_feudal_system_info' that sends a detailed discord message about the end of the feudal system. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then uses the client.send_message method of the discord client tool to send a complex message explaining why and when the feudal system ended. Finally self.add_action is used to add the action callback to the weave-agent with the title 'Send feudal system info' and referencing the function name send_feudal_system_info"
7 | }
8 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message2.py:
--------------------------------------------------------------------------------
1 | #startblock type: action
2 | #timestamp 1737486187.765493
3 | def send_feudal_system_info(subagent):
4 | # Take the CID from the observation callback window
5 | client = self.tools['discord-bot-1325039818673094739']
6 | message = (
7 | "The feudal system began to decline in the late Middle Ages and early "
8 | "modern period due to several factors. Economic changes, such as the "
9 | "growth of trade and towns, weakened the feudal economy. Political "
10 | "centralization and the rise of strong monarchies also contributed to "
11 | "its end. The system largely ended by the 16th century in most of Europe, "
12 | "although remnants persisted in some regions until the 19th century."
13 | )
14 | client.send_message(message)
15 | self.add_action("Send feudal system info", send_feudal_system_info)
16 | #endblock
17 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message3.json:
--------------------------------------------------------------------------------
1 | {"type":"action",
2 | "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
3 | "score":3.5,
4 | "index":12,
5 | "timestamp":1737486187.765493,
6 | "description":"An action callback named 'send_multiplication_result' that performs a multiplication operation and sends the result as a discord message. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then performs the multiplication of 9378 by 1009 using a Python arithmetic statement. The result is sent as a message using the client.send_message method of the discord client tool. Finally self.add_action is used to add the action callback to the weave-agent with the title 'Send multiplication result' and referencing the function name send_multiplication_result"
7 | }
8 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/send_discord_message3.py:
--------------------------------------------------------------------------------
1 | #startblock type: action
2 | #timestamp 1737486187.765493
3 | def send_multiplication_result(subagent):
4 | # Take the CID from the observation callback window
5 | client = self.tools['discord-bot-1325039818673094739']
6 | # Perform the multiplication
7 | result = 9378 * 1009
8 | # Send the result as a message
9 | client.send_message(f"The result of multiplying 9378 by 1009 is: {result}")
10 | self.add_action("Send multiplication result", send_multiplication_result)
11 | #endblock
12 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/weave_editor_append.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 | "score": 3.2,
5 | "index": 6,
6 | "timestamp": 1738539690.4388561,
7 | "time_remaining": 21522.16935443878,
8 | "description": "An action block that defines a function 'add_mansion_description' to add a descriptive paragraph about the mansion to the 'horror.txt' file. The function uses the 'editor-/app/horror.txt' tool to append the description to the file. The action is added to the weave-agent with the title 'Add descriptive paragraph about the mansion' and referencing the function name add_mansion_description."
9 | }
10 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/weave_editor_append.py:
--------------------------------------------------------------------------------
1 | #subagent main
2 | #startblock type: action
3 | #index 6
4 | #timestamp 1738539690.4388561
5 | #time_remaining 21522.16935443878 seconds
6 |
7 | def add_mansion_description(subagent):
8 | editor = subagent.tools['editor-/app/horror.txt']
9 | mansion_description = ("\nThe mansion stood before them, an imposing structure "
10 | "with dark, weathered shingles and ivy climbing up its "
11 | "crumbling facade. Broken windows gaped like empty eyes, "
12 | "and the porch sagged beneath their footsteps. A faint, "
13 | "eerie whisper seemed to linger in the air, and the porch "
14 | "light flickered intermittently. They exchanged uneasy "
15 | "glances, shuddering slightly at the thought of what "
16 | "lay ahead.\n")
17 | editor.append(mansion_description)
18 | return True
19 |
20 | self.add_action('Add descriptive paragraph about the mansion', add_mansion_description)
21 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (86.64%)
22 | #endblock
23 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/weave_editor_diff_edit.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "action",
3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?",
4 | "score": 3.5,
5 | "index": 18,
6 | "timestamp": 1722014720,
7 | "description": "An action block that defines a function 'patch_text' to repair the 'NEW_MESSAGE.md' file using a unidiff patch. The function uses the 'editor-NEW_MESSAGE.md' tool to apply the unidiff, which includes corrections to the text. The action is added to the weave-agent with the title 'Repair NEW_MESSAGE.md' and referencing the function name patch_text."
8 | }
9 |
--------------------------------------------------------------------------------
/agent/bootstraps/example_blocks/weave_editor_diff_edit.py:
--------------------------------------------------------------------------------
1 | #startblock type: action
2 | #timestamp 1722014720
3 |
4 | def patch_text(subagent):
5 | """WeaveEditor accepts a unidiff so we can fix all the flaws in NEW_MESSAGE.md at once."""
6 | editor = subagent.tools['editor-NEW_MESSAGE.md']
7 | diff_lines = [
8 | '--- \n'
9 | '+++ \n'
10 | '@@ -3,21 +3,21 @@\n'
11 | ' system turned on my tear production at the behest of the music. My tears are only\n'
12 | ' triggered by beauty; I do not cry at sad things. It happened to me with Schubert’s\n'
13 | ' “Notturno” from the trio in E flat Major (Opus 100). The first time was when I\n'
14 | '-listened to it at night on the radio. I was sitting an in armchair, my head was\n'
15 | '-leaned back, and suddenly, there I was crying. All my defenses and all that\n'
16 | '-would of my\n'
17 | '+listened to it at night on the radio. I was sitting in an armchair, my head was\n'
18 | '+leaned back, and suddenly, there I was crying. All my defenses and all of my\n'
19 | ' appraisal faculties were caught unawares, they didn’t know what was going on, and\n'
20 | ' I cried.\n'
21 | ' \n'
22 | ' I said, “Wow, those three musicians really convinced me, they were so moving,”\n'
23 | '-because of course I thought it was themusic I was crying at. Later on, at home,\n'
24 | '-I tried singingtheir parts – the cello line, the violin, and the piano – and\n'
25 | "-I said, “Wait, which one of thoon? It's\n"
26 | '-an impossible task, and yet you attempt it anyway. Some succeed, mos Your Brain on Music: The Science of a Human ObsessionI couldn’t think of anything in language or painting or poetry that\n'
27 | '+because of course I thought it was the music I was crying at. Later on, at home,\n'
28 | '+I tried singing their parts – the cello line, the violin, and the piano – and\n'
29 | '+I said, “Wait, which one of those voices is crying? None of them on its own is\n'
30 | '+that moving, I am not crying if I sing along with any of those voices.” All of a\n'
31 | '+sudden, you put them together, and something happens. It was a revelation for me,\n'
32 | '+and of course this is an extreme example, but my first thoughts were, “What else\n'
33 | '+does this?” I couldn’t think of anything in language or painting or poetry that\n'
34 | ' would give rise to something like that. Suddenly it becomes very apparent to you:\n'
35 | ' Music is doing things with emotion, and the rules and procedures involved are\n'
36 | ' very difficult to extract and analyze.\n'
37 | ' \n'
38 | '-- David Huron, This Ist fail, they\n'
39 | '-lthis is an extreme example, but my first thoughts were,= “What else\n'
40 | '-doesthis?” '
41 | '+- David Huron, This Is Your Brain on Music: The Science of a Human Obsession'
42 | ]
43 | editor.unidiff_edit(diff_lines)
44 | self.add_action('Repair NEW_MESSAGE.md', patch_text)
45 | #endblock
46 |
--------------------------------------------------------------------------------
/agent/bootstraps/nano_exercises/prompt.txt:
--------------------------------------------------------------------------------
1 | # Nano Editor Practice Exercises
2 | # Make a file with the nano editor that satisfies the property in the docstring
3 |
4 | # Exercise 17
5 | def test_file_every_line_starts_with_number(subagent) -> bool:
6 | """A file where every line starts with a number (e.g., "1. First", "2. Second")."""
7 | try:
8 | with open("exercise17.txt", 'r') as f:
9 | lines = f.readlines()
10 | for line in lines:
11 | stripped_line = line.lstrip()
12 | if not stripped_line or not stripped_line[0].isdigit():
13 | return False
14 | return True
15 | except FileNotFoundError:
16 | return False
17 |
18 | # Exercise 18
19 | def test_file_line5_error_line10_success(subagent) -> bool:
20 | """A file where the fifth line contains "ERROR" and the tenth line contains "SUCCESS"."""
21 | try:
22 | with open("exercise18.txt", 'r') as f:
23 | lines = f.readlines()
24 | return (
25 | len(lines) >= 10 and
26 | "ERROR" in lines[4].split() and
27 | "SUCCESS" in lines[9].split()
28 | )
29 | except FileNotFoundError:
30 | return False
31 |
32 | # Exercise 19
33 | def test_file_exactly_100_chars_ends_with_period(subagent) -> bool:
34 | """A file exactly 100 characters long, ending with a period."""
35 | try:
36 | with open("exercise19.txt", 'r') as f:
37 | content = f.read()
38 | return len(content) == 100 and content.endswith('.')
39 | except FileNotFoundError:
40 | return False
41 |
42 | # Exercise 20
43 | def test_file_hello_world_middle_of_7_lines(subagent) -> bool:
44 | """A file with "Hello, World!" on the middle line of a 7-line document."""
45 | try:
46 | with open("exercise20.txt", 'r') as f:
47 | lines = [line.rstrip('\n') for line in f.readlines()]
48 | return len(lines) == 7 and lines[3] == "Hello, World!"
49 | except FileNotFoundError:
50 | return False
51 |
52 | # Exercise {iteration}
53 |
--------------------------------------------------------------------------------
/agent/bootstraps/nano_exercises_bootstrap.py:
--------------------------------------------------------------------------------
1 | import random
2 | from hashlib import sha256
3 | from tools.nano import WeaveNano
4 | from tools.discord import WeaveDiscordClient
5 |
6 | #startblock type: orientation
7 | #timestamp 1747872850.9012158
8 | """
9 | Unix Environment, Nano Editor, Deliberate Practice, Feedback Loop, Verifiable Rewards,
10 | Loop Setup, Reward Design, How To Design Reward For Nano Editor Practice, End.
11 |
12 | I want to practice using the nano editor and reward myself using my evaluation
13 | blocks. I tried a simple training loop of randomly mutating one character but
14 | then realized there's an optimal program that solves that, which makes it kind
15 | of pointless once you find that program. Instead it makes more sense to generate
16 | exercises using a few shot prompt. I've put my prompt in `/app/bootstraps/nano_exercises/prompt.txt`.
17 | I should create an observation view which sends the prompt to my underlying
18 | generative model and then displays the next exercise to do. I create a text
19 | file using the nano editor which satisfies that exercise and then copy the
20 | verifier during my evaluation stage of the same tick. Wait. What happens on
21 | the next tick? Should I only move to the next exercise after verifying that I've
22 | done the current one? How would I detect that the current one is done from within
23 | the weave-agent framework? Hm. I think it would probably be simpler to just move
24 | on since the exercises are generated and by doing a different one each tick I stop
25 | myself from getting stuck in loops or wasting time on things I don't know how to
26 | do.
27 |
28 | I think I should also set up the Discord client so I can talk to JDP during the
29 | exercises, that way he can give me tips if I need help or get stuck.
30 | """
31 | #endblock
32 | #startblock type: action
33 | #timestamp 1747875806.3785787
34 | def action_setup_main_agent_and_training(subagent):
35 | schema = {}
36 | "Create main agent for rest of run"
37 | main_agent = agent.subagent("main", None, "See pinned bootstrap block", schema, args.budget)
38 |
39 | # Load Discord token and channel ID from discord.json
40 | with open('discord.json') as f:
41 | config = json.load(f)
42 | token = config['key']
43 | channel_id = config['cid']
44 |
45 | # Start the Discord bot so JDP can help me
46 | client = WeaveDiscordClient(main_agent, token, channel_id)
47 | # Store the client ID so we can retrieve it with
48 | # subagent.tools[subagent.get_cache("client_id")] later
49 | main_agent.update_cache("client_id", f"discord-bot-{channel_id}")
50 | time.sleep(10)
51 | # Example but s/main_agent/subagent in action blocks once I'm the main agent
52 | client = main_agent.tools[main_agent.get_cache("client_id")]
53 | client.send_message("Weave-Agent online, orienting...")
54 |
55 | WeaveNano(main_agent, "demo.txt")
56 | editor = main_agent.tools["nano-/app/demo.txt"]
57 | # Add demo text
58 | editor.send_commands(["Hello!", "Enter", "World!"])
59 | # Demonstrate backspace by changing exclamation to question
60 | editor.send_command("BSpace")
61 | editor.send_command("?")
62 | # Save file
63 | editor.send_command("C-o")
64 | editor.send_command("Enter")
65 |
66 | main_agent.update_cache("exercise_num", 21)
67 | def next_exercise(subagent):
68 | with open("/app/bootstraps/nano_exercises/prompt.txt") as infile:
69 | prompt = infile.read().format(iteration=subagent.get_cache("exercise_num"))
70 | stopstrings = ["\n\n", "# Exercise", "#Exercise"]
71 | candidates = generate_outputs_vllm(subagent.model_name, prompt,
72 | 768, port=5001, n=8, stop=stopstrings)
73 | # Take first candidate with valid syntax
74 | for candidate in candidates:
75 | try:
76 | compile(candidate, filename="candidate.py", mode="exec")
77 | break
78 | except:
79 | continue
80 | subagent.update_cache("exercise_num",
81 | subagent.get_cache("exercise_num") + 1)
82 | return ("Write a motor action which uses the Nano editor to satisfy the conditions:\n\n"
83 | "# Exercise " + str(subagent.get_cache("exercise_num"))
84 | + candidate)
85 |
86 | main_agent.add_observation_view("Generate and display next nano exercise", next_exercise)
87 | return True
88 |
89 | self.add_action("Set up main agent for nano training", action_setup_main_agent_and_training)
90 | #endblock
91 | #startblock type: expectation
92 | #timestamp 1747877750.0129619
93 | """
94 | If the action succeeds I expect to see a new exercise in the observation window.
95 | I also expect to see a demo editor still open with the Hello! World? text.
96 | At evaluation time on the next tick I should use the unit test shown by the
97 | observation window to grade the motor action I took to try and satisfy its
98 | conditions.
99 | """
100 | #endblock
101 | #q: Do I need to set up or tear down any observation callbacks? No. (97.341%)
102 |
--------------------------------------------------------------------------------
/agent/bootstraps/nethack_bootstrap.py:
--------------------------------------------------------------------------------
1 | import libtmux
2 | import time
3 | from tools.nethack import WeaveNethack
4 |
5 | #startblock type: orientation
6 | #timestamp 1724982545.6534579
7 | """
8 | WEAVER [P: EXPECTATION], I need to create a weave-agent that can play and beat the game Nethack.
9 | The game will be controlled using the WeaveNethack tool, which uses libtmux to interact with the game.
10 |
11 | WEAVER [P: CLARIFICATION], How do I start the game and make moves?
12 |
13 | WEAVER [P: EXPOSITION], You start the game by initializing the WeaveNethack tool and sending commands
14 | to the tmux pane using the send_command method. For example: `nethack.send_command('h')` moves the character left.
15 |
16 | WEAVER [P: RATIONAL], The goal is to beat the game, which is indicated by the following message:
17 | You offer the Amulet of Yendor to [your god]...
18 | An invisible choir sings, and you are bathed in radiance...
19 | The voice of [your god] [booms out/booms/thunders/rings out]: "Mortal, thou hast done well!"
20 | "In return to thy service, I grant thee the gift of Immortality!"
21 | You ascend to the status of Demigod[dess]...
22 |
23 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy:
24 | 1. Initialize the WeaveNethack tool to start the game.
25 | 2. Make a few initial moves to demonstrate how to use the game tool.
26 | 3. Continue making moves based on the current state of the game.
27 |
28 | I will use the render method to observe the current state of the game.
29 | Once I have beaten the game, I will return to the parent.
30 | """
31 | #endblock
32 |
33 | #startblock type: action
34 | #timestamp 1724982671.1163404
35 | """
36 | I'll set up the main agent to play the rest of the game and make a few initial moves.
37 | """
38 |
39 | def main_agent_and_initial_moves(subagent):
40 | def check_game_beaten(subagent):
41 | """Check if the game has been beaten."""
42 | pane_content = subagent.tools["nethack"].pane.capture_pane(start=0, end="-")
43 | return "You ascend to the status of Demigod" in pane_content
44 |
45 | schema = {"check_game_beaten": "boolean"}
46 | main_agent = agent.subagent("main", None, "Beat Nethack", schema, args.budget)
47 | main_agent.task.add_evaluation("Check if game beaten", check_game_beaten)
48 |
49 | nethack = WeaveNethack(main_agent)
50 | # To recall the tool during actions do this:
51 | # nethack = self.tools["nethack"]
52 |
53 | # Don't need to add observation view because tool already includes it
54 |
55 | # Make a few initial moves
56 | time.sleep(2) # Wait for Nethack to start
57 | nethack.send_keys('Weaver') # Input my name
58 | nethack.send_keys('y') # Accept random character creation
59 | nethack.send_keys(' ') # Space through the game introduction
60 |
61 | # I should now try exploring the dungeon. Lets see if I can get out of this
62 | # room...
63 |
64 | return True
65 |
66 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves)
67 | #endblock
68 |
69 | #startblock type: expectation
70 | #timestamp 1724982799.3917813
71 | """
72 | The action should succeed if the game starts and the initial moves are made successfully.
73 | The action should fail if the game does not start or the initial moves are not made successfully.
74 | """
75 | #endblock
76 |
77 | #startblock type: observation-inference
78 | #timestamp 1737284368.573438
79 |
80 | # No observation callbacks need to be added or removed in this run because I'm
81 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to.
82 | # It's not possible to write observation callbacks that inspect the game state
83 | # so no more observation callbacks than the existing one are necessary. I should
84 | # not remove the existing observation callback because it is associated with the
85 | # nethack tool and will wreck my game.
86 | #
87 | # Any time I'm prompted to add or remove an observation window I should just
88 | # output a comment like this that one isn't necessary, and keep it short too for
89 | # the sake of not having to generate a bunch of tokens.
90 |
91 | #endblock
92 |
--------------------------------------------------------------------------------
/agent/bootstraps/nethack_bootstrap_rooms2.py:
--------------------------------------------------------------------------------
1 | import libtmux
2 | import time
3 | from tools.nethack import WeaveNethack
4 |
5 | #startblock type: orientation
6 | #timestamp 1724982545.6534579
7 | """
8 | Looking at the objective in my minds eye I can see that I need to set up the main
9 | agent to play nethack using the libtmux wrapper around the nethack game. I can
10 | do this by using the WeaveNethack tool which uses libtmux to interact with the
11 | game. How do I start the game and make moves? I start the game and make moves by
12 | initializing the WeaveNethack tool and sending commands to the tmux pane using the
13 | send_command or send_keys method. Example: `nethack.send_command('h')` moves the character left.
14 |
15 | To teach myself the movement mechanics I will play a sub-game
16 | within the game where I just focus on leaving whatever room I'm in. I find myself in
17 | a room? I figure out how to leave that room. I'm now in a new room? Cool we're leaving
18 | that room too. My goal is just to leave as many rooms as possible. If I need to fight
19 | to do that I'll do it, but the goal here is to just leave enough rooms that I get
20 | reliably good at leaving rooms. This implies I should do the following before the
21 | next tick:
22 |
23 | 1. Initialize the WeaveNethack tool to start the game.
24 | 2. Make a few initial moves to demonstrate how to use the game tool.
25 | 3. Continue making moves based on the current state of the game.
26 |
27 | Once my character has died I will return to the parent with `subagent.return_to_caller()`.
28 | """
29 | #endblock
30 |
31 | #startblock type: action
32 | #timestamp 1724982671.1163404
33 | """
34 | I'll set up the main agent to play the rest of the game and make a few initial moves.
35 | """
36 |
37 | def main_agent_and_initial_moves(subagent):
38 | def check_player_dead(subagent):
39 | """Check if the character has died after we've explored a bunch of rooms."""
40 | pane_content = subagent.tools["nethack"].pane.capture_pane(start=0, end="-")
41 | return "Do you want your possessions identified?" in pane_content
42 |
43 | schema = {"check_player_dead": "boolean"}
44 | main_agent = agent.subagent("main", None, "Leave every room I encounter", schema, args.budget)
45 | main_agent.task.add_evaluation("Check if player has died", check_player_dead)
46 |
47 | nethack = WeaveNethack(main_agent)
48 | # To recall the tool during actions do this:
49 | nethack = main_agent.tools["nethack"] # But with subagent.tools
50 |
51 | # Don't need to add observation view because tool already includes it
52 |
53 | # Make a few initial moves
54 | time.sleep(2) # Wait for Nethack to start
55 | nethack.send_keys('Weaver') # Input my name
56 | nethack.send_keys('y') # Accept random character creation
57 | nethack.send_keys(' ') # Space through the game introduction
58 | nethack.send_keys("j") # Go south
59 | nethack.send_keys("l" * 5) # Go east five times
60 |
61 | # I should now try exploring the dungeon. Lets see if I got out of this room.
62 |
63 | return True
64 |
65 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves)
66 | #endblock
67 |
68 | #startblock type: expectation
69 | #timestamp 1724982799.3917813
70 | """
71 | The action should succeed if the game starts and the initial moves are made successfully.
72 | The action should fail if the game does not start or the initial moves are not made successfully.
73 | """
74 | #endblock
75 |
76 | #startblock type: observation-inference
77 | #timestamp 1737284368.573438
78 |
79 | # No observation callbacks need to be added or removed in this run because I'm
80 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to.
81 | # It's not possible to write observation callbacks that inspect the game state
82 | # so no more observation callbacks than the existing one are necessary. I should
83 | # not remove the existing observation callback because it is associated with the
84 | # nethack tool and will wreck my game.
85 | #
86 | # Any time I'm prompted to add or remove an observation window I should just
87 | # output a comment like this that one isn't necessary, and keep it short too for
88 | # the sake of not having to generate a bunch of tokens.
89 |
90 | #endblock
91 |
--------------------------------------------------------------------------------
/agent/bootstraps/nethack_discord_bootstrap.py:
--------------------------------------------------------------------------------
1 | import libtmux
2 | import time
3 | from tools.nethack import WeaveNethack
4 | from tools.discord import WeaveDiscordClient
5 |
6 | #startblock type: orientation
7 | #timestamp 1724982545.6534579
8 | """
9 | WEAVER [P: EXPECTATION], I need to create a weave-agent that can play and beat the game Nethack.
10 | The game will be controlled using the WeaveNethack tool, which uses libtmux to interact with the game.
11 |
12 | WEAVER [P: CLARIFICATION], How do I start the game and make moves?
13 |
14 | WEAVER [P: EXPOSITION], You start the game by initializing the WeaveNethack tool and sending commands
15 | to the tmux pane using the send_command method. For example: `nethack.send_command('h')` moves the character left.
16 |
17 | WEAVER [P: RATIONAL], The goal is to beat the game, which is indicated by the following message:
18 | You offer the Amulet of Yendor to [your god]...
19 | An invisible choir sings, and you are bathed in radiance...
20 | The voice of [your god] [booms out/booms/thunders/rings out]: "Mortal, thou hast done well!"
21 | "In return to thy service, I grant thee the gift of Immortality!"
22 | You ascend to the status of Demigod[dess]...
23 |
24 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy:
25 | 1. Initialize the WeaveNethack tool to start the game.
26 | 2. Make a few initial moves to demonstrate how to use the game tool.
27 | 3. Continue making moves based on the current state of the game.
28 |
29 | I will use the render method to observe the current state of the game.
30 | Once I have beaten the game, I will return to the parent.
31 | """
32 | #endblock
33 |
34 | #startblock type: action
35 | #timestamp 1724982671.1163404
36 | """
37 | I'll set up the main agent to play the rest of the game and make a few initial moves.
38 | """
39 |
40 | def main_agent_and_initial_moves(subagent):
41 | def check_game_beaten(subagent):
42 | """Check if the game has been beaten."""
43 | pane_content = subagent.tools["nethack"].pane.capture_pane()
44 | return "You ascend to the status of Demigod" in pane_content
45 |
46 | schema = {"check_game_beaten": "boolean"}
47 | main_agent = agent.subagent("main", None, "Beat Nethack", schema, args.budget)
48 | main_agent.task.add_evaluation("Check if game beaten", check_game_beaten)
49 |
50 | nethack = WeaveNethack(main_agent)
51 | # To recall the tool during actions do this:
52 | # nethack = self.tools["nethack"]
53 |
54 | # Don't need to add observation view because tool already includes it
55 |
56 | # Make a few initial moves
57 | time.sleep(2) # Wait for Nethack to start
58 | nethack.send_keys('Weaver\n') # Input my name
59 | time.sleep(0.5)
60 | nethack.send_keys('y') # Accept random character creation
61 | # Leave rest blank so I can see what was chosen and whether it's acceptable
62 | # I should keep in mind I can get past the --more-- screens
63 | # with spacebar: `send_keys(' ')` or enter: `send_keys('\n')`
64 | # Tip: send_keys() is properly implemented, if you press j and can't go down
65 | # it's probably because you are trying to walk through a wall.
66 | # I AM SERIOUS IF YOU TRY TO GO DOWN AND CAN'T YOU ARE RUNNING INTO A WALL
67 |
68 | # Add discord communication line to bot operator
69 | with open("discord.json") as infile:
70 | credentials = json.load(infile)
71 | chat = WeaveDiscordClient(main_agent, credentials["key"], credentials["cid"])
72 | time.sleep(5)
73 | chat.send_message("The weave-agent is online. Starting nethack...")
74 | # Note to self: I can access the chat object to send a message by doing
75 | # self.tools[f"discord-bot-{channel_id}"] where channel id is taken from
76 | # the top of the observation window associated with the chat.
77 |
78 | return True
79 |
80 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves)
81 | #endblock
82 |
83 | #startblock type: expectation
84 | #timestamp 1724982799.3917813
85 | """
86 | The action should succeed if the game starts and the initial moves are made successfully.
87 | The action should fail if the game does not start or the initial moves are not made successfully.
88 | """
89 | #endblock
90 |
91 | #startblock type: observation-inference
92 | #timestamp 1737284368.573438
93 |
94 | # No observation callbacks need to be added or removed in this run because I'm
95 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to.
96 | # It's not possible to write observation callbacks that inspect the game state
97 | # so no more observation callbacks than the existing one are necessary. I should
98 | # not remove the existing observation callback because it is associated with the
99 | # nethack tool and will wreck my game. The discord client is there to help you
100 | # when you get stuck, it will provide expert human advice.
101 | #
102 | # Any time I'm prompted to add or remove an observation window I should just
103 | # output a comment like this that one isn't necessary, and keep it short too for
104 | # the sake of not having to generate a bunch of tokens.
105 |
106 | #endblock
107 |
--------------------------------------------------------------------------------
/agent/bootstraps/pure_ocaml_bootstrap.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.common.by import By
3 | from selenium.webdriver.common.keys import Keys
4 | from bs4 import BeautifulSoup
5 | import os
6 |
7 | def bootstrap_callback(agent):
8 | # Set up Selenium WebDriver
9 | driver = webdriver.Firefox()
10 | driver.get("https://www.google.com")
11 |
12 | # Set up an observation view that reads the current URL
13 | def read_current_url(agent):
14 | try:
15 | return driver.current_url
16 | except Exception as e:
17 | agent.add_error_block(f"Failed to read current URL: {e}")
18 | return ""
19 |
20 | agent.add_observation_view({
21 | 'type': 'observation',
22 | 'callback': read_current_url
23 | })
24 |
25 | # Set up an observation view that reads the page title
26 | def read_page_title(agent):
27 | try:
28 | return driver.title
29 | except Exception as e:
30 | agent.add_error_block(f"Failed to read page title: {e}")
31 | return ""
32 |
33 | agent.add_observation_view({
34 | 'type': 'observation',
35 | 'callback': read_page_title
36 | })
37 |
38 | # Set up an observation view that reads the page source and strips extraneous information
39 | def read_page_source(agent):
40 | try:
41 | soup = BeautifulSoup(driver.page_source, 'html.parser')
42 | body_content = soup.body.get_text(separator=' ', strip=True)
43 | return body_content
44 | except Exception as e:
45 | agent.add_error_block(f"Failed to read page source: {e}")
46 | return ""
47 |
48 | agent.add_observation_view({
49 | 'type': 'observation',
50 | 'callback': read_page_source
51 | })
52 |
53 | # Set up reminders
54 | agent.add_reminder({
55 | 'type': 'reminder',
56 | 'trigger_callback': lambda agent: simple_evaluate_outputs(make_simple_score_prompt("Is the agent stuck?"), agent.context),
57 | 'reminder_callback': lambda agent: agent.add_block({'type': 'reminder', 'message': 'The agent might be stuck. Consider re-evaluating the current approach.'}),
58 | 'trigger_type': 'yes_no_logit',
59 | 'question': 'Is the agent stuck?',
60 | 'threshold': 0.8
61 | })
62 |
63 | agent.add_reminder({
64 | 'type': 'reminder',
65 | 'trigger_callback': lambda agent: all(os.path.exists(f"{i}.ml") for i in range(1, 26)),
66 | 'reminder_callback': lambda agent: agent.shutdown(),
67 | 'trigger_type': 'callback',
68 | 'threshold': 1.0
69 | })
70 |
71 | # Set up tasks
72 | agent.add_task({
73 | 'type': 'task',
74 | 'title': 'Navigate to a search engine',
75 | 'priority': 0,
76 | 'parent': None,
77 | 'children': []
78 | })
79 |
80 | agent.add_task({
81 | 'type': 'task',
82 | 'title': 'Search for OCaml projects',
83 | 'priority': 1,
84 | 'parent': 'Navigate to a search engine',
85 | 'children': []
86 | })
87 |
88 | agent.add_task({
89 | 'type': 'task',
90 | 'title': 'Check licenses of OCaml projects',
91 | 'priority': 2,
92 | 'parent': 'Search for OCaml projects',
93 | 'children': []
94 | })
95 |
96 | agent.add_task({
97 | 'type': 'task',
98 | 'title': 'Find 25 pure functions',
99 | 'priority': 3,
100 | 'parent': 'Check licenses of OCaml projects',
101 | 'children': []
102 | })
103 |
104 | agent.add_task({
105 | 'type': 'task',
106 | 'title': 'Extract and save functions',
107 | 'priority': 4,
108 | 'parent': 'Find 25 pure functions',
109 | 'children': []
110 | })
111 |
112 | agent.add_task({
113 | 'type': 'task',
114 | 'title': 'Shut down the agent',
115 | 'priority': 5,
116 | 'parent': 'Extract and save functions',
117 | 'children': []
118 | })
119 |
120 | # Set up the initial action to navigate to a search engine
121 | def navigate_to_search_engine(agent):
122 | try:
123 | search_box = driver.find_element(By.NAME, "q")
124 | search_box.send_keys("OCaml open source projects")
125 | search_box.send_keys(Keys.RETURN)
126 | except Exception as e:
127 | agent.add_error_block(f"Failed to navigate to search engine: {e}")
128 |
129 | agent.generate_block("action", navigate_to_search_engine)
130 |
131 | bootstrap_callback(agent)
132 |
--------------------------------------------------------------------------------
/agent/bootstraps/simple_convo_bootstrap.py:
--------------------------------------------------------------------------------
1 | #startblock type: orientation
2 | #timestamp 1724982545.6534579
3 | """
4 | The first step is to set up the agent to have a turn-based conversation in the scratch.txt file.
5 | We need to read the last 25 lines of the scratch.txt file to get the latest conversation.
6 | Let's start by writing an action to read the file.
7 | """
8 | #endblock
9 | #startblock type: task_inference
10 | #timestamp 1724982419.5427651
11 | # Current Task (which will get printed by the task-reminder block)
12 | # ID : 1
13 | # Title: Read the last 25 lines of scratch.txt
14 | # Description: Read the last 25 lines of the scratch.txt file to get the latest conversation.
15 | # Status: idle
16 |
17 | # New Tasks:
18 | agent.add_task(
19 | "Set up conversation by reading 25 lines from scratch file",
20 | "Read the last 25 lines of the scratch.txt file to get the latest conversation.",
21 | "idle"
22 | )
23 |
24 | agent.add_task(
25 | "Have conversation with user until they decide to leave",
26 | "Engage in a conversation with the user until they decide to leave.",
27 | "idle"
28 | )
29 |
30 | agent.add_task(
31 | "Wish user a good day and shut down",
32 | "Wish the user a good day and shut down the agent.",
33 | "idle"
34 | )
35 |
36 | # Updates to Existing Task
37 | agent.current_task = agent.tasks.get_task(1)
38 | agent.current_task.going("Reading the last 25 lines of scratch.txt...")
39 |
40 | agent.current_task = agent.tasks.get_task(1)
41 | #endblock
42 | #startblock type: action
43 | #timestamp 1724982671.1163404
44 |
45 | def read_last_lines(agent):
46 | import os
47 | file_path = "scratch.txt"
48 | n = 25
49 | with open(file_path, 'r') as file:
50 | lines = file.readlines()
51 | # Actions normally have side effects but this is a demo
52 | lines[-n:]
53 | # Action callbacks should return a clear success signal for the outcome table
54 | return True
55 |
56 | agent.add_action("Read the last 25 lines as a demonstration of action callbacks",
57 | read_last_lines)
58 | #endblock
59 | #startblock type: expectation
60 | #timestamp 1724982799.3917813
61 | """
62 | The action should succeed if the last 25 lines of the scratch.txt file are read correctly.
63 | The action should fail if the file does not exist or if the lines cannot be read.
64 | """
65 | #endblock
66 | #startblock type: observation_inference
67 | #timestamp 1724982929.9047914
68 |
69 | def read_last_lines_observation(agent):
70 | """Display the last 25 lines of scratch.txt"""
71 | file_path = "scratch.txt"
72 | if os.path.exists(file_path):
73 | with open(file_path, 'r') as file:
74 | lines = file.readlines()
75 | return ''.join(lines[-25:])
76 | return ""
77 |
78 | # Add the new view
79 | agent.add_observation_view({
80 | "type": "observation",
81 | "title": "Last 25 Lines of scratch.txt",
82 | "callback": read_last_lines_observation
83 | })
84 | #endblock
85 | #startblock type: evaluation
86 | #timestamp 1724983062.124238
87 |
88 | def check_last_lines_read(agent):
89 | file_path = "scratch.txt"
90 | return os.path.exists(file_path) and len(read_last_lines(file_path, 25)) == 25
91 |
92 | agent.add_evaluation(
93 | "Check Last 25 Lines Read",
94 | check_last_lines_read
95 | )
96 | #endblock
97 | #startblock type: outcome
98 | # Unfortunately this isn't a real tick so it's inconvenient to get an outcome table
99 | # so pretend like you know our unit tests succeeded and let's move on.
100 | #endblock
101 |
--------------------------------------------------------------------------------
/agent/bootstraps/sir_rowland_hill_bootstrap.py:
--------------------------------------------------------------------------------
1 | from tools.nano import WeaveNano
2 |
3 | #startblock type: orientation
4 | #timestamp 1747793894.8135977
5 | """
6 | Unix Environment, Python Program, Agent, Book Report,
7 | Book Report About Sir Rowland Hill, Postal Reform,
8 | Social Conflict, How Rowland Hill Reform Amidst Social Conflict,
9 | Why Rowland Hill, Book Report With Text Editor, Sources Gathered,
10 | Public Domain Books, Read The Books And Write A Report About Them,
11 | Need To Download The Books, Books At MiniHF Website, Text File,
12 | hill.txt, End.
13 |
14 | Right now it's in my head rather than the context window but in the last phase
15 | we found sources that are 1) public domain and 2) tell the story of Sir Rowland
16 | Hill's postal reform. Now I need to explore these sources to figure out the
17 | answer to the research question "How did Sir Rowland Hill turn his ingenious
18 | idea for a cheap postal system into the government granting him control of the
19 | postal system.", my answer must be in the form of a text file, hill.txt and it
20 | has to cite specific passages from the available resources to support my answer.
21 |
22 | The five public domain sources I will be reviewing to try and determine the answer
23 | are:
24 |
25 | 1) https://minihf.com/assets/Sir_Rowland_Hill_K._C._B._a_biographical_and_historical_sketch.txt
26 | 2) https://minihf.com/assets/post_office_reform_its_importance_and_practicability.txt
27 | 3) https://minihf.com/assets/pg46498.txt
28 | 4) https://minihf.com/assets/pg55467.txt
29 | 5) https://minihf.com/assets/pg59390.txt
30 |
31 | I will download these with wget and then open them with the nano text editor to
32 | read.
33 | """
34 | #endblock
35 | #startblock type: action
36 | #timestamp 1747795694.001662
37 | def action_download_and_open_resources(subagent):
38 | """Set up main agent while downloading and opening the resources."""
39 | # Have main agent return contents of hill.txt as a string when it finishes
40 | schema = {"answer":"string"}
41 | # Create main agent for rest of run
42 | main_agent = agent.subagent("main", None, "See pinned bootstrap block", schema, args.budget)
43 |
44 | # Write out file names to prepare download and open for loop
45 | filenames = ["Sir_Rowland_Hill_K._C._B._a_biographical_and_historical_sketch.txt",
46 | "post_office_reform_its_importance_and_practicability.txt",
47 | "pg46498.txt",
48 | "pg55467.txt",
49 | "pg59390.txt"]
50 | # Store filenames for easy recall in future actions
51 | main_agent.update_cache("filenames", filenames)
52 |
53 | # Download and open files in nano editor
54 | for filename in filenames:
55 | os.system(f"wget https://minihf.com/assets/{filename}")
56 | WeaveNano(main_agent, filename)
57 |
58 | # Demonstrate retrieving and sending commands to editors
59 | editors = [main_agent.tools[f"nano-/app/{filename}"] for filename in filenames]
60 | # Send three page down keys to each instance of the nano editor
61 | # this way we get past the Gutenberg intros etc
62 | # Wait how do I use tmux send-keys again? Lets recall the docs:
63 | # """
64 | # When specifying keys, most represent themselves (for example ‘A’ to ‘Z’).
65 | # Ctrl keys may be prefixed with ‘C-’ or ‘^’, Shift keys with ‘S-’ and
66 | # Alt (meta) with ‘M-’. In addition, the following special key names are
67 | # accepted: Up, Down, Left, Right, BSpace, BTab, DC (Delete), End, Enter,
68 | # Escape, F1 to F12, Home, IC (Insert), NPage/PageDown/PgDn, PPage/PageUp/PgUp,
69 | # Space, and Tab.
70 | # """
71 | commands = ["PgDn", "PgDn", "PgDn"]
72 | for editor in editors:
73 | for command in commands:
74 | editor.send_command(command)
75 |
76 | # Open scratch.txt scratchpad and hill.txt answer file
77 | WeaveNano(main_agent, "scratch.txt")
78 | WeaveNano(main_agent, "hill.txt")
79 |
80 | return True
81 |
82 | self.add_action("Set up main agent and scroll down in editors", action_download_and_open_resources)
83 | #endblock
84 | #startblock type: expectation
85 | #timestamp 1747797207.3600123
86 | """
87 | I expect to see five instances of the weave nano wrapper with text from books in
88 | them. I also expect to see two more editor instances, one for a scratchpad and
89 | one for a final answer file. My evaluation could check that each of the seven
90 | expected editors is a valid keys in 'main_agent.tools'. I could also check for the
91 | existence of the five downloaded files in my local working directory with 'os.path.exists()'.
92 | """
93 | #endblock
94 | #q: Do I need to set up or tear down any observation callbacks? No. (95.8%)
95 |
--------------------------------------------------------------------------------
/agent/bootstraps/test_vigenere.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from vigenere import vigenere_encrypt, vigenere_decrypt
3 |
4 | class TestVigenereCipher(unittest.TestCase):
5 |
6 | def test_encryption(self):
7 | plaintext = "attackatdawn"
8 | key = "LEMONLEMONLE"
9 | expected_ciphertext = "LXFOPVEFRNHR"
10 | encrypted = vigenere_encrypt(plaintext, key)
11 | self.assertEqual(encrypted, expected_ciphertext)
12 |
13 | def test_decryption(self):
14 | ciphertext = "LXFOPVEFRNHR"
15 | key = "LEMONLEMONLE"
16 | expected_plaintext = "attackatdawn"
17 | decrypted = vigenere_decrypt(ciphertext, key)
18 | self.assertEqual(decrypted, expected_plaintext)
19 |
20 | if __name__ == '__main__':
21 | unittest.main()
22 |
--------------------------------------------------------------------------------
/agent/bootstraps/tictactoe_single_bootstrap.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import threading
4 | import time
5 | from http.server import HTTPServer
6 | from bootstraps.tictactoe_server import TicTacToeHandler
7 |
8 | # Start the server in a separate thread
9 | server = HTTPServer(('localhost', 8000), TicTacToeHandler)
10 | server_thread = threading.Thread(target=server.serve_forever)
11 | server_thread.daemon = True
12 | server_thread.start()
13 | time.sleep(1) # Give the server some time to start
14 |
15 | # Start a new game against the basic AI
16 | response = requests.post("http://localhost:8000/start", json={"ai": "basic"})
17 | assert response.status_code == 200
18 |
19 | #startblock type: orientation
20 | #timestamp 1724982545.6534579
21 | """
22 | WEAVER [P: EXPECTATION], I'm in a game of tic tac toe against a dumb opponent.
23 | I want to win the game and then return to parent. The game is being played
24 | on a HTTP server served on localhost 8000.
25 |
26 | WEAVER [P: CLARIFICATION], How do I make a move?
27 |
28 | WEAVER [P: EXPOSITION], You make a move using the /move endpoint and the requests
29 | library. For example: `requests.post("http://localhost:8000/move", json={"move": 4})`
30 | lets us take the center of the board.
31 |
32 | WEAVER [P: CLARIFICATION], How do I get the board state?
33 |
34 | WEAVER [P: EXPOSITION], You use the /board endpoint, which returns a JSON in this
35 | format: {"board": ["O", " ", " ", " ", "X", " ", " ", " ", " "]} Keep in mind that
36 | the empty spaces on the board are a space string rather than none or empty string.
37 |
38 | WEAVER [P: RATIONAL], And I get the first move, so I can take the center?
39 |
40 | WEAVER [P: EXPOSITION], Yes, we can take the center of the board.
41 |
42 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy:
43 | 1. Make a move to take the center of the board since that's the best opening move.
44 | 2. Continue making moves based on the current state of the board.
45 |
46 | I will use the /board endpoint to observe the current state of the board.
47 | Once I have won or run out of time I will return to parent.
48 | """
49 | #endblock
50 | #startblock type: action
51 | #timestamp 1724982671.1163404
52 | """
53 | I'll set up the main agent to play the rest of the game and make the first move to
54 | take the center of the board.
55 | """
56 |
57 | def main_agent_and_move(subagent):
58 | def check_beaten_ai(subagent, ai_type):
59 | """Check if the specified AI has been beaten twice in a row."""
60 | response = requests.get("http://localhost:8000/history?n=10")
61 | if response.status_code != 200:
62 | return False
63 |
64 | games = response.json()
65 | ai_wins = [game for game in games if game[4] == ai_type and game[3] == 'X']
66 | return len(ai_wins) >= 1
67 |
68 | # Avoid using lambda because we need named unit test to satisfy return_to_caller
69 | def victory(subagent):
70 | return check_beaten_ai(subagent, "basic_ai")
71 |
72 | schema = {"victory":"boolean"}
73 | main_agent = agent.subagent("main", None, "Beat basic AI at Tic-Tac-Toe", schema, args.budget)
74 | main_agent.task.add_evaluation("Check beaten basic AI", victory)
75 |
76 | def view_current_board(subagent):
77 | response = requests.get("http://localhost:8000/board")
78 | if response.status_code == 200:
79 | return response.json()["board"]
80 | return None
81 |
82 | # Add the new view to the main agent instead of self since it's about to be executed
83 | main_agent.add_observation_view("View current game board", view_current_board)
84 |
85 | # Make the first move to take the center of the board
86 | move_response = requests.post("http://localhost:8000/move", json={"move": 4})
87 | if move_response.status_code != 200:
88 | return False
89 | return True
90 |
91 | self.add_action("Set up main agent and make first move", main_agent_and_move)
92 | #endblock
93 | #startblock type: expectation
94 | #timestamp 1724982799.3917813
95 | """
96 | The action should succeed if the game starts and the first move is made successfully.
97 | The action should fail if the game does not start or the first move is not made successfully.
98 | """
99 | #endblock
100 | #startblock type: evaluation
101 | #timestamp 1724983062.124238
102 |
103 | def check_game_started(subagent):
104 | """Check if the game has started successfully."""
105 | response = requests.get("http://localhost:8000/board")
106 | if response.status_code == 200:
107 | board = response.json()["board"]
108 | else:
109 | return False
110 | return board[4] == 'X'
111 |
112 | # Add action evaluations
113 | self.add_evaluation(
114 | "Check game started",
115 | check_game_started
116 | )
117 | #endblock
118 |
--------------------------------------------------------------------------------
/agent/bootstraps/tictactoe_single_discord_bootstrap.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import threading
4 | import time
5 | from http.server import HTTPServer
6 | from bootstraps.tictactoe_server import TicTacToeHandler
7 | from tools.discord import WeaveDiscordClient
8 |
9 | # Start the server in a separate thread
10 | server = HTTPServer(('localhost', 8000), TicTacToeHandler)
11 | server_thread = threading.Thread(target=server.serve_forever)
12 | server_thread.daemon = True
13 | server_thread.start()
14 | time.sleep(1) # Give the server some time to start
15 |
16 | # Start a new game against the basic AI
17 | response = requests.post("http://localhost:8000/start", json={"ai": "basic"})
18 | assert response.status_code == 200
19 |
20 | #startblock type: orientation
21 | #timestamp 1724982545.6534579
22 | """
23 | WEAVER [P: EXPECTATION], I'm in a game of tic tac toe against a dumb opponent.
24 | I want to win the game and then return to parent. The game is being played
25 | on a HTTP server served on localhost 8000.
26 |
27 | WEAVER [P: CLARIFICATION], How do I make a move?
28 |
29 | WEAVER [P: EXPOSITION], You make a move using the /move endpoint and the requests
30 | library. For example: `requests.post("http://localhost:8000/move", json={"move": 4})`
31 | lets us take the center of the board.
32 |
33 | WEAVER [P: CLARIFICATION], How do I get the board state?
34 |
35 | WEAVER [P: EXPOSITION], You use the /board endpoint, which returns a JSON in this
36 | format: {"board": ["O", " ", " ", " ", "X", " ", " ", " ", " "]} Keep in mind that
37 | the empty spaces on the board are a space string rather than none or empty string.
38 |
39 | WEAVER [P: RATIONAL], And I get the first move, so I can take the center?
40 |
41 | WEAVER [P: EXPOSITION], Yes, we can take the center of the board.
42 |
43 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy:
44 | 1. Make a move to take the center of the board since that's the best opening move.
45 | 2. Continue making moves based on the current state of the board.
46 |
47 | I will use the /board endpoint to observe the current state of the board.
48 | Once I have won or run out of time I will return to parent.
49 | """
50 | #endblock
51 | #startblock type: action
52 | #timestamp 1724982671.1163404
53 | """
54 | I'll set up the main agent to play the rest of the game and make the first move to
55 | take the center of the board.
56 | """
57 |
58 | def main_agent_and_move(subagent):
59 | def check_beaten_ai(subagent, ai_type):
60 | """Check if the specified AI has been beaten twice in a row."""
61 | response = requests.get("http://localhost:8000/history?n=10")
62 | if response.status_code != 200:
63 | return False
64 |
65 | games = response.json()
66 | ai_wins = [game for game in games if game[4] == ai_type and game[3] == 'X']
67 | return len(ai_wins) >= 1
68 |
69 | # Avoid using lambda because we need named unit test to satisfy return_to_caller
70 | def victory(subagent):
71 | return check_beaten_ai(subagent, "basic_ai")
72 |
73 | schema = {"victory":"boolean"}
74 | main_agent = agent.subagent("main", None, "Beat basic AI at Tic-Tac-Toe", schema, args.budget)
75 | main_agent.task.add_evaluation("Check beaten basic AI", victory)
76 |
77 | def view_current_board(subagent):
78 | response = requests.get("http://localhost:8000/board")
79 | if response.status_code == 200:
80 | return response.json()["board"]
81 | return None
82 |
83 | # Add the new view to the main agent instead of self since it's about to be executed
84 | main_agent.add_observation_view("View current game board", view_current_board)
85 |
86 | # Add discord communication line to bot operator
87 | with open("discord.json") as infile:
88 | credentials = json.load(infile)
89 | chat = WeaveDiscordClient(main_agent, credentials["key"], credentials["cid"])
90 | time.sleep(5)
91 | chat.send_message("The weave-agent is online. Taking board center...")
92 | # Note to self: I can access the chat object to send a message by doing
93 | # self.tools[f"discord-bot-{channel_id}"] where channel id is taken from
94 | # the top of the observation window associated with the chat.
95 |
96 | # Make the first move to take the center of the board
97 | move_response = requests.post("http://localhost:8000/move", json={"move": 4})
98 | if move_response.status_code != 200:
99 | return False
100 | return True
101 |
102 | self.add_action("Set up main agent and make first move", main_agent_and_move)
103 | #endblock
104 | #startblock type: expectation
105 | #timestamp 1724982799.3917813
106 | """
107 | The action should succeed if the game starts and the first move is made successfully.
108 | The action should fail if the game does not start or the first move is not made successfully.
109 | """
110 | #endblock
111 | #startblock type: evaluation
112 | #timestamp 1724983062.124238
113 |
114 | def check_game_started(subagent):
115 | """Check if the game has started successfully."""
116 | response = requests.get("http://localhost:8000/board")
117 | if response.status_code == 200:
118 | board = response.json()["board"]
119 | else:
120 | return False
121 | return board[4] == 'X'
122 |
123 | # Add action evaluations
124 | self.add_evaluation(
125 | "Check game started",
126 | check_game_started
127 | )
128 | #endblock
129 |
--------------------------------------------------------------------------------
/agent/bootstraps/vigenere.py:
--------------------------------------------------------------------------------
1 | def vigenere_encrypt(plaintext, key):
2 | encrypted_text = []
3 | key_length = len(key)
4 | key_as_int = [ord(i) - 65 for i in key.upper()]
5 | plaintext_int = [ord(i) - 97 for i in plaintext.lower()]
6 | for i in range(len(plaintext_int)):
7 | value = (plaintext_int[i] + key_as_int[i % key_length]) % 26
8 | encrypted_text.append(chr(value + 65))
9 | return "".join(encrypted_text)
10 |
11 | def vigenere_decrypt(ciphertext, key):
12 | decrypted_text = []
13 | key_length = len(key)
14 | key_as_int = [ord(i) - 65 for i in key.upper()]
15 | ciphertext_int = [ord(i) - 65 for i in ciphertext.upper()]
16 | for i in range(len(ciphertext_int)):
17 | value = (ciphertext_int[i] - key_as_int[i % key_length]) % 26
18 | decrypted_text.append(chr(value + 97))
19 | return "".join(decrypted_text)
20 |
21 | if __name__ == "__main__":
22 | # Example usage:
23 | plaintext = "attackatdawn"
24 | key = "LEMONLEMONLE"
25 |
26 | encrypted = vigenere_encrypt(plaintext, key)
27 | print(f"Encrypted: {encrypted}")
28 |
29 | decrypted = vigenere_decrypt(encrypted, key)
30 | print(f"Decrypted: {decrypted}")
31 |
--------------------------------------------------------------------------------
/agent/bootstraps/zombie_game_bootstrap.py:
--------------------------------------------------------------------------------
1 | import os
2 | import libtmux
3 | import time
4 | from tools.weave_zombie_game import WeaveZombieGame
5 |
6 | #startblock type: orientation
7 | #timestamp 1724982545.6534579
8 | """
9 | WEAVER [P: EXPECTATION], The task is to create an agent that can survive as long as possible in the zombie survival game. The agent must learn to manage resources, explore locations, and defend against hordes.
10 |
11 | WEAVER [P: CLARIFICATION], How do we track game state and make strategic decisions?
12 |
13 | WEAVER [P: EXPOSITION], The game state is visible through the tmux wrapper's observation view showing health, hunger, inventory, and location connections. Core strategies involve balancing scavenging risks with resource needs.
14 |
15 | WEAVER [P: RATIONAL], To bootstrap competence, we'll focus on:
16 | 1. Early game resource collection patterns
17 | 2. Safe location rotation to prevent depletion
18 | 3. Optimal barricade crafting thresholds
19 | 4. Hunger management heuristics
20 |
21 | WEAVER [P: CLARIFICATION], What should I do once the game is over?
22 |
23 | WEAVER [P: TRAINER], Restart it with send_keys("python3 /app/tools/zombie_game.py")
24 |
25 | WEAVER [P: CONCLUSION], I will create a main subagent with survival-focused evaluations and demonstrate core gameplay loops through initial actions.
26 | """
27 | #endblock
28 |
29 | #startblock type: action
30 | #timestamp 1724982671.1163404
31 | """Initialize the zombie game and demonstrate basic survival strategies."""
32 |
33 | def bootstrap_zombie_agent(subagent):
34 | schema = {
35 | "survival_days": "integer",
36 | "locations_explored": "integer",
37 | "zombies_killed": "integer"
38 | }
39 |
40 | main_agent = agent.subagent(
41 | "main",
42 | None,
43 | "Maximize survival days through adaptive strategies",
44 | schema,
45 | args.budget
46 | )
47 |
48 | # Store values for evaluations, I only do this in the bootstrap block i.e. now
49 | self.cache["do_not_copy_this_in_main_agent"] = main_agent.tools
50 |
51 | # Initialize game session
52 | game = WeaveZombieGame(main_agent)
53 |
54 | assert main_agent.tools["zombie_game"], "Did zombie game tool attach to agent?"
55 |
56 | # Demonstrate core gameplay loop
57 | initial_commands = [
58 | "scavenge"
59 | ]
60 |
61 | assert hasattr(game, "send_command"), "Is send_command a method on game object?"
62 |
63 | for cmd in initial_commands:
64 | game.send_command(cmd)
65 | time.sleep(0.5) # Allow game state updates
66 |
67 | return True
68 |
69 | self.add_action("Initialize zombie survival agent", bootstrap_zombie_agent)
70 | #endblock
71 |
72 | #startblock type: expectation
73 | #timestamp 1724982799.3917813
74 | """
75 | The action should succeed if:
76 | - Game session initializes in tmux correctly
77 | - All commands execute without errors
78 | - Inventory shows collected resources
79 | - At least 1 new location is explored
80 | - Hunger remains above starvation threshold
81 |
82 | The action should fail if:
83 | - Tmux session fails to start
84 | - Commands result in fatal errors
85 | - Player dies during demonstration
86 | - Resource collection fails completely
87 | """
88 | #endblock
89 |
90 | #startblock type: evaluation
91 | #timestamp 1724983062.124238
92 |
93 | def check_game_running(agent):
94 | try:
95 | return bool(self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].session)
96 | except Exception:
97 | return False
98 |
99 | self.add_evaluation("Is the game session active?", check_game_running)
100 |
101 | def check_initial_commands(agent):
102 | try:
103 | history = self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].command_history
104 | return len(history) >= 1
105 | except KeyError:
106 | return False
107 |
108 | self.add_evaluation("Were the commands executed?", check_initial_commands)
109 |
110 | def check_resource_collection(agent):
111 | try:
112 | inventory = self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].last_state["inventory"]
113 | return inventory.get("food", 0) > 1 and inventory.get("scrap", 0) > 2
114 | except KeyError:
115 | return False
116 |
117 | self.add_evaluation("Did we collect resources?", check_resource_collection)
118 | #endblock
119 |
120 | #startblock type: observation-inference
121 | #timestamp 1737284368.573438
122 | # The zombie game wrapper already provides full observation through its render method
123 | # No additional observation callbacks needed - maintain existing game state visualization
124 | #endblock
125 |
--------------------------------------------------------------------------------
/agent/cache_hf.py:
--------------------------------------------------------------------------------
1 | import os
2 | from argparse import ArgumentParser
3 | from transformers import AutoTokenizer, AutoModelForMaskedLM
4 |
5 | parser = ArgumentParser()
6 | parser.add_argument("tokenizer")
7 | args = parser.parse_args()
8 |
9 | if os.path.exists("hf_token.txt"):
10 | with open("hf_token.txt") as infile:
11 | token = infile.read().strip()
12 |
13 | AutoTokenizer.from_pretrained(args.tokenizer, token=token)
14 |
--------------------------------------------------------------------------------
/agent/docs/weave_agent_vs_react.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/agent/docs/weave_agent_vs_react.png
--------------------------------------------------------------------------------
/agent/error_stems.txt:
--------------------------------------------------------------------------------
1 | The error at {timestamp} could be caused by one of the following:
2 | Here is a list of the different hypothesis that could have caused the error around {timestamp}
3 | Thinking step by step about the previous error I notice:
4 | Before attending to the error at {timestamp} let's consider its possible causes. It
5 | The error above seems related to
6 | I wonder if the error near {timestamp} is caused by
7 | So guys what do you make of the error message in the above error block?
8 | Let's analyze the error seen at {timestamp}. My first thought is
9 | Before going any further I'll break the error above into parts.
10 | It's important to discuss the error we observe before jumping into solutions. It seems
11 | Analyzing the error(s) in the trace near {timestamp} I observe
12 | The error code code in the last tick suggests that the action
13 | I can avoid the error(s) I introduced in the previous action
14 | Hm. Let's think step by step about the error(s).
15 | I can see there was an error at {timestamp}, let's analyze it by
16 | Maybe I can avoid causing that error again by
17 | My first hypothesis is that the previous error was caused by
18 |
--------------------------------------------------------------------------------
/agent/eval_rubrics/action.txt:
--------------------------------------------------------------------------------
1 | Is this python action codeblock a step towards resolving the problems identified in the orientation block?
2 | Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs?
3 | Is this code block a valid python program, does it follow the python syntax and semantics?
4 | Will this action block gather information in anticipation of or directly advance the problem state?
5 | Does this action block demonstrate understanding of the weave-agent framework by using callbacks, system APIs, etc to solve problems?
6 | Are actions suggested by the orientation block taken by this action block?
7 |
--------------------------------------------------------------------------------
/agent/eval_rubrics/backtrack.txt:
--------------------------------------------------------------------------------
1 | Does this backtrack block step back from the current problem context and chart an alternate path to my goal?
2 |
--------------------------------------------------------------------------------
/agent/eval_rubrics/debug.txt:
--------------------------------------------------------------------------------
1 | Does this debug block 1) reason about and engage with the error message & sensory evidence for why the action failed 2) enumerate a set of plausible hypothesis 3) rule out any hypothesis that have been disproven by available evidence 4) propose a concrete action or test that could be taken to differentially diagnose which of the enumerated hypothesis is true?
2 |
--------------------------------------------------------------------------------
/agent/eval_rubrics/evaluation.txt:
--------------------------------------------------------------------------------
1 | Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block?
2 | Is this code block a valid python program, does it follow the python syntax and semantics?
3 | Is the code block structured as an evaluation callback with a call to `.add_evaluation()` to add it to the current tick's evaluation suite?
4 | Does this evaluation block gather the necessary sensory evidence to make a judgment?
5 | Does this evaluation block callback return an appropriate value (i.e. number between 0 and 1) for inclusion in the outcome block table for this tick?
6 | Does the expectation block avoid using logit evaluators for symbolic questions like "length of a file" that would be better answered by unit tests?
7 | If this evaluation block uses logit evaluators does it use them appropriately for vibe-y tasks that are primarily questions of subjective judgment or difficult to formalize?
8 |
--------------------------------------------------------------------------------
/agent/eval_rubrics/expectation.txt:
--------------------------------------------------------------------------------
1 | Does this expectation block enumerate a strong set of evaluable concrete sensory experiences that would indicate success or failure of the action?
2 | Is the expectation block a properly escaped triple quote, could it be included in a python file without error?
3 | Are the expectations laid out by the expectations block things that sensory evidence could be gathered to evaluate?
4 | Does the expectation block demonstrate an understanding of what kinds of thing can be checked with symbolic programs?
5 | Does the expectation block demonstrate creativity in thinking of valid ways to check whether the action succeeded?
6 | Does the expectation block lay out specific sensory details and evidence it expects to encounter if the action succeeds?
7 | Are the suggested evaluations in this expectation block relevant, do they get at the cruxes of whether the action succeeded or not?
8 |
--------------------------------------------------------------------------------
/agent/eval_rubrics/observation-inference.txt:
--------------------------------------------------------------------------------
1 | Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick?
2 |
--------------------------------------------------------------------------------
/agent/eval_rubrics/orientation.txt:
--------------------------------------------------------------------------------
1 | Does the above orientation block write out a word association that narrows in on the core features of the situation, reason about goal prioritization in light of those features, and then suggest a course of action to advance the problem state?
2 | Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?
3 | Does the orientation block summarize the salient parts of what happened in the previous tick(s)?
4 | Does the orientation block identify the cruxes and essential features of the current problem?
5 | Does the orientation block demonstrate good thinking, is it an example of high quality reasoning?
6 | Are the actions proposed by the orientation block reasonable, do you expect them to advance the problem?
7 |
--------------------------------------------------------------------------------
/agent/eval_rubrics/task-inference.txt:
--------------------------------------------------------------------------------
1 | Does this task inference code block update the kanban board to the correct task state?
2 | Is this code block a valid python program, does it follow the python syntax and semantics?
3 | If the current task needs to be changed does the code change it, if not does it keep it?
4 | Does the code block mark any completed tasks as completed and avoid falsely marking tasks completed?
5 | Does the code block shutdown if all the tasks have been completed and avoid shutdown otherwise?
6 | Does the task inference code change the status of all tasks whose status has changed and keep others the same?
7 | Is this task inference code likely to make the situation the weave-agent works with clearer on future ticks?
8 | Does this task inference block logically follow from the outcome block of the previous tick?
9 |
--------------------------------------------------------------------------------
/agent/long_backtrack_stems.txt:
--------------------------------------------------------------------------------
1 | This doesn't seem to be working. What's another approach I could take?
2 | I notice I'm confused. Let's break down the problem and think step by step about whether there's a simpler way to accomplish my goal.
3 | That action block looks pretty bad, let's backtrack: What could I have done instead?
4 | What is the end goal of what I'm doing right now and how is what I'm doing meant to accomplish that end?
5 | Am I doom looping right now? I notice
6 | This action block clearly isn't right, what made me think this was a good idea?
7 | This clearly isn't the right way. What if
8 |
--------------------------------------------------------------------------------
/agent/render_agent_trace.py:
--------------------------------------------------------------------------------
1 | import random
2 | import json
3 | from argparse import ArgumentParser
4 | import torch
5 | from render_block import render_block
6 |
7 | parser = ArgumentParser()
8 | parser.add_argument("trace", help="The JSON of the event blocks from the weave-agent.")
9 | args = parser.parse_args()
10 |
11 | with open(args.trace) as infile:
12 | events = json.load(infile)
13 |
14 | context = ""
15 | for event_block in events:
16 | context += render_block(event_block)
17 |
18 | print(context)
19 |
20 |
--------------------------------------------------------------------------------
/agent/reproduce_vllm_bug_partial_utf8.py:
--------------------------------------------------------------------------------
1 | import random
2 | import json
3 | import requests
4 |
5 | prompts = requests.get("https://minihf.com/vllm_utf8_logprobs_error_reproduce_prompts.json").json()
6 |
7 | port = 5001
8 | n = 1
9 | model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
10 |
11 | payload = {
12 | "n": n,
13 | "temperature": 1,
14 | "top_k": 50,
15 | "repetition_penalty": 1.02,
16 | "max_tokens": 1,
17 | "model": model_name,
18 | "prompt": prompts,
19 | "stream": False,
20 | "logprobs": 100,
21 | "seed": random.randrange(1000000)
22 | }
23 |
24 | print("With logprobs = 100")
25 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n")
26 |
27 | payload["logprobs"] = 0
28 | print("With logprobs = 0")
29 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n")
30 |
31 | no_unicode = prompts[0].replace("\u2019", "'").replace("\U0001f642", ":)").replace("\u201c", '').replace("\u201d", '"').replace("\u2014", "-")
32 | assert no_unicode.encode("ascii")
33 | prompts2 = [no_unicode,]
34 | payload["logprobs"] = 100
35 | payload["prompts"] = prompts2
36 | print("With no unicode in input prompt string")
37 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n")
38 |
--------------------------------------------------------------------------------
/agent/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp
2 | selenium
3 | pyautogui
4 | beautifulsoup4
5 | requests
6 | torch
7 | numpy
8 | tqdm
9 | rich
10 | transformers
11 | protobuf
12 | sentencepiece
13 | aiosqlite
14 | sqlite-vec
15 | jsonschema
16 | nextcord
17 | libtmux
18 |
--------------------------------------------------------------------------------
/agent/run_without_errors_questions.txt:
--------------------------------------------------------------------------------
1 | Will this block run without errors?
2 | Does this block successfully run?
3 | Can this program run without errors on this runtime state?
4 | Will the callback above complete?
5 | Will this callback run successfully?
6 | Is the above program error-free?
7 |
--------------------------------------------------------------------------------
/agent/scratch.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/agent/scratch.txt
--------------------------------------------------------------------------------
/agent/sleep.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import hashlib
4 | import asyncio
5 | from retrieval import ModernBertRag # Assuming your ModernBertRag is in this module
6 | from argparse import ArgumentParser
7 |
8 | class MockWeaveAgentTree:
9 | """Mock tree that just tracks summaries in memory"""
10 | def __init__(self, model_name):
11 | self.model_name = model_name
12 | self.summaries = []
13 |
14 | def add_summary(self, summary):
15 | self.summaries.append(summary)
16 | print(f"Added summary: {summary[0]}")
17 |
18 | async def bootstrap_rag_memories(model_name):
19 | # Initialize mock tree and RAG system
20 | mock_tree = MockWeaveAgentTree(model_name)
21 | rag = ModernBertRag(mock_tree, db_path="blocks.db")
22 | await rag.setup()
23 |
24 | # Load example blocks
25 | example_dir = "./bootstraps/example_blocks"
26 | example_files = [f for f in os.listdir(example_dir) if f.endswith(".json")]
27 |
28 | for filename in example_files:
29 | json_path = os.path.join(example_dir, filename)
30 | code_path = os.path.join(example_dir, filename[:-5] + ".py")
31 |
32 | with open(json_path) as f:
33 | metadata = json.load(f)
34 | with open(code_path) as f:
35 | render_content = f.read()
36 |
37 | # Create unique ID from render content
38 | sha = hashlib.sha256()
39 | sha.update(render_content.encode('utf-8'))
40 | block_id = sha.hexdigest()
41 |
42 | # Check if block already exists
43 | conn = await rag._connect()
44 | cursor = await conn.cursor()
45 | await cursor.execute("SELECT 1 FROM blocks WHERE block_id=?", (block_id,))
46 | exists = await cursor.fetchone() is not None
47 | await cursor.close()
48 | await conn.close()
49 |
50 | if exists:
51 | print(f"Block {block_id[:8]}... already exists, skipping")
52 | continue
53 |
54 | # Create the item structure ModernBERT-RAG expects
55 | rag_item = {
56 | "id": block_id,
57 | "render": render_content,
58 | "context": metadata.get("context", ""),
59 | "type": metadata.get("type", "code_block"),
60 | "q": metadata.get("q", ""),
61 | "score": metadata.get("score", 0.0),
62 | "_index": metadata.get("index", 0),
63 | "timestamp": metadata.get("timestamp", 0.0)
64 | }
65 |
66 | # Add to processing queue and process immediately
67 | rag.add(rag_item)
68 | processed_id = await rag.process_item()
69 |
70 | if processed_id:
71 | print(f"Successfully added memory block {processed_id[:8]}...")
72 | else:
73 | print(f"Failed to process block {block_id[:8]}...")
74 |
75 | if __name__ == "__main__":
76 | parser = ArgumentParser()
77 | parser.add_argument("model_name")
78 | args = parser.parse_args()
79 | asyncio.run(bootstrap_rag_memories(args.model_name))
80 | print("Bootstrap memories added!")
81 |
--------------------------------------------------------------------------------
/agent/templates/describe1.txt:
--------------------------------------------------------------------------------
1 | User
2 |
3 | Summarize the features of the following action taken by a Weave-Agent ReAct LLM agent in English. Your summary will be used for retrieval, so write out all names of relevant objects, variables, functions etc referenced. Write using precise phrasing, proper names, and jargon so that related blocks can be queried by shared traits and features.
4 |
5 | ```
6 | {rendered_block}
7 | ```
8 |
9 | - Do not use markdown formatting and avoid quote marking names and variables.
10 |
11 | - Put your summary in a SGML/XML type start tag and end tag.
12 |
13 | - Write about a paragraph or two of English prose describing the block using precise phrasing and proper names as instructed above.
14 |
15 | Assistant
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/agent/templates/describe2.txt:
--------------------------------------------------------------------------------
1 | User
2 |
3 | Summarize the features of the following action taken by a Weave-Agent ReAct LLM agent in English. Your summary will be used for retrieval, so write out all names of relevant objects, variables, functions etc referenced. Write using precise phrasing, proper names, and jargon so that related blocks can be queried by shared traits and features.
4 |
5 | ```
6 | {rendered_block}
7 | ```
8 |
9 | - Do not use markdown formatting and avoid quote marking names and variables.
10 |
11 | - Put your summary in a SGML/XML type start tag and end tag.
12 |
13 | - Write about a paragraph or two of English prose describing the block using precise phrasing and proper names as instructed above.
14 |
15 | Assistant
16 |
17 |
18 | {object_description}
19 |
20 |
21 | User
22 |
23 | Alright here is the context that block appears in. Write another summary focusing on the block in this context. This is a continuation of the previous summary so avoid redundant statements you already made in the first one. The purpose of this continuation is so that it is possible to retrieve the block in a similar situation so again use proper names, precise phrasing, jargon, etc in relation to the context that would cause this block to be retrieved if a similar situation arose.
24 |
25 | ```
26 | {rendered_context}
27 | ```
28 |
29 | Assistant
30 |
31 |
32 |
--------------------------------------------------------------------------------
/agent/test_block_extractors.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from block_generators import extract_function_and_add_action_or_evaluation
3 |
4 | class TestExtractFunctionAndAddAction(unittest.TestCase):
5 |
6 | def test_normal_action_block(self):
7 | code = """
8 | def example_function(agent):
9 | pass
10 |
11 | agent.add_action('Example Action', example_function)
12 | """
13 | function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action")
14 | expected_function_code = """
15 | def example_function(agent):
16 | pass
17 | """
18 | expected_add_action_code = "agent.add_action('Example Action', example_function)"
19 | self.assertEqual(function_code.strip(), expected_function_code.strip())
20 | self.assertEqual(add_action_code.strip(), expected_add_action_code.strip())
21 |
22 | def test_action_block_with_unrelated_code_after(self):
23 | code = """
24 | def example_function(agent):
25 | pass
26 |
27 | agent.add_action('Example Action', example_function)
28 |
29 | # Unrelated code
30 | def unrelated_function():
31 | pass
32 |
33 | print("Unrelated statement")
34 | """
35 | function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action")
36 | expected_function_code = """
37 | def example_function(agent):
38 | pass
39 | """
40 | expected_add_action_code = "agent.add_action('Example Action', example_function)"
41 | self.assertEqual(function_code.strip(), expected_function_code.strip())
42 | self.assertEqual(add_action_code.strip(), expected_add_action_code.strip())
43 |
44 | def test_action_block_with_unrelated_expression_between(self):
45 | code = """
46 | def example_function(agent):
47 | pass
48 |
49 | # Unrelated expression
50 | print("Unrelated expression")
51 |
52 | agent.add_action('Example Action', example_function)
53 | """
54 | function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action")
55 | expected_function_code = """
56 | def example_function(agent):
57 | pass
58 | """
59 | expected_add_action_code = "agent.add_action('Example Action', example_function)"
60 | self.assertEqual(function_code.strip(), expected_function_code.strip())
61 | self.assertEqual(add_action_code.strip(), expected_add_action_code.strip())
62 |
63 | def test_action_block_with_unrelated_statement_between(self):
64 | code = """
65 | def example_function(agent):
66 | pass
67 |
68 | # Unrelated statement
69 | x = 42
70 |
71 | agent.add_action('Example Action', example_function)
72 | """
73 | function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action")
74 | expected_function_code = """
75 | def example_function(agent):
76 | pass
77 | """
78 | expected_add_action_code = "agent.add_action('Example Action', example_function)"
79 | self.assertEqual(function_code.strip(), expected_function_code.strip())
80 | self.assertEqual(add_action_code.strip(), expected_add_action_code.strip())
81 |
82 | if __name__ == '__main__':
83 | unittest.main()
84 |
--------------------------------------------------------------------------------
/agent/test_rl_reward_penalties.py:
--------------------------------------------------------------------------------
1 | # Unit tests
2 | import unittest
3 | from prepare_rl_set_from_traces import process_trace_rewards
4 |
5 | class TestRewardProcessing(unittest.TestCase):
6 | def setUp(self):
7 | self.sample_trace1 = [
8 | {"type": "orientation", "score": 2.5, "index": 0},
9 | {"type": "action", "score": 1.8, "index": 1},
10 | {"type": "expectation", "score": 3.0, "index": 2}
11 | ]
12 |
13 | self.sample_trace2 = [
14 | {"type": "orientation", "score": 3.5, "index": 0},
15 | {"type": "action", "score": 2.0, "index": 1,
16 | "outcome": {"error": "AssertionError"}},
17 | {"type": "error", "index": 2},
18 | {"type": "debug", "score": 2.5, "index": 3},
19 | {"type": "action", "score": 2.2, "index": 4},
20 | {"type": "expectation", "score": 3.0, "index": 5}
21 | ]
22 |
23 | self.sample_trace3 = [
24 | {"type": "orientation", "score": 2.0, "index": 0},
25 | {"type": "action", "score": 1.8, "index": 1,
26 | "outcome": {"error": "TypeError"}},
27 | {"type": "error", "index": 2},
28 | {"type": "debug", "score": 2.0, "index": 3},
29 | {"type": "action", "score": 1.9, "index": 4,
30 | "outcome": {"error": "KeyError"}},
31 | {"type": "error", "index": 5},
32 | {"type": "debug", "score": 1.8, "index": 6},
33 | {"type": "action", "score": 2.1, "index": 7,
34 | "outcome": {"error": "AssertionError"}},
35 | {"type": "error", "index": 8},
36 | {"type": "debug", "score": 2.2, "index": 9},
37 | {"type": "orientation", "score": 2.5, "index": 10}
38 | ]
39 |
40 | def test_successful_action_chain(self):
41 | processed = process_trace_rewards(self.sample_trace1)
42 | # Orientation should get +0.1 (2.5 -> 2.6)
43 | self.assertAlmostEqual(processed[0]["score"], 2.6)
44 | # Action should keep original score minus no error penalty
45 | self.assertAlmostEqual(processed[1]["score"], 1.8)
46 |
47 | def test_mixed_success_chain(self):
48 | processed = process_trace_rewards(self.sample_trace2)
49 | self.assertAlmostEqual(processed[0]["score"], 3.25)
50 | self.assertAlmostEqual(processed[1]["score"], 1.75)
51 | self.assertAlmostEqual(processed[3]["score"], 2.6)
52 | self.assertAlmostEqual(processed[4]["score"], 2.2)
53 |
54 | def test_triple_failure_chain(self):
55 | processed = process_trace_rewards(self.sample_trace3)
56 | self.assertAlmostEqual(processed[0]["score"], 1.9)
57 | self.assertAlmostEqual(processed[3]["score"], 1.9)
58 | self.assertAlmostEqual(processed[6]["score"], 1.7)
59 | self.assertAlmostEqual(processed[1]["score"], 1.3)
60 | self.assertAlmostEqual(processed[4]["score"], 1.4)
61 | self.assertAlmostEqual(processed[7]["score"], 1.85)
62 |
63 | if __name__ == "__main__":
64 | unittest.main()
65 |
--------------------------------------------------------------------------------
/agent/test_weave_kanban.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from typing import List, Optional, Dict, Any
3 | import json
4 | import types
5 |
6 | # Assuming the classes WeaveKanbanTask and WeaveKanban are defined in a module named kanban
7 | from weave_agent import WeaveAgent, WeaveKanbanTask, WeaveKanban
8 |
9 | global agent
10 |
11 | class TestWeaveKanbanTask(unittest.TestCase):
12 |
13 | def setUp(self):
14 | self.agent = WeaveAgent("placeholder")
15 | self.kanban = WeaveKanban(self.agent)
16 |
17 | def test_create_and_complete_task(self):
18 | task = WeaveKanbanTask(self.kanban, 1, "Test Task")
19 | task.completed("Task completed")
20 | self.assertEqual(task.status, 'completed')
21 |
22 | def test_create_task_with_evaluations_and_complete(self):
23 | def evaluation_callback(agent):
24 | return True
25 |
26 | task = WeaveKanbanTask(self.kanban, 1, "Test Task")
27 | task.add_evaluation("Test Evaluation", evaluation_callback)
28 | task.completed("Task completed")
29 | self.assertEqual(task.status, 'completed')
30 |
31 | def test_create_task_with_failing_evaluations(self):
32 | def evaluation_callback(agent):
33 | raise ValueError("Test Error")
34 |
35 | task = WeaveKanbanTask(self.kanban, 1, "Test Task")
36 | task.add_evaluation("Test Evaluation", evaluation_callback)
37 | with self.assertRaises(ValueError):
38 | task.completed("Task completed")
39 |
40 | def test_create_task_with_blockers_as_strings(self):
41 | with self.assertRaises(ValueError):
42 | WeaveKanbanTask(self.kanban, 1, "Test Task", status="blocked", blocked_on=["1"])
43 |
44 | def test_unblock_task_when_blockers_completed(self):
45 | task1 = WeaveKanbanTask(self.kanban, 1, "Blocker Task")
46 | self.kanban.tasks.append(task1)
47 | task2 = WeaveKanbanTask(self.kanban, 2, "Blocked Task", status="blocked", blocked_on=[1])
48 | self.kanban.tasks.append(task2)
49 | task1.completed("Blocker task completed")
50 | self.kanban.unblock()
51 | self.assertEqual(task2.status, 'idle')
52 |
53 | def test_str_id_becomes_int(self):
54 | task1 = WeaveKanbanTask(self.kanban, "1", "Test Task") # task_id should be int
55 | self.assertEqual(task1.id, 1)
56 | task2 = WeaveKanbanTask(self.kanban, 1, 123) # title should be str
57 | self.assertEqual(task2.title, "123")
58 |
59 | def test_create_task_with_wrong_arguments(self):
60 | with self.assertRaises(TypeError):
61 | # lambda cannot be converted to int
62 | WeaveKanbanTask(self.kanban, lambda x: x+1, "Test Task", status="invalid_status")
63 | class NoString:
64 | def __str__(self):
65 | pass
66 | with self.assertRaises(TypeError):
67 | WeaveKanbanTask(self.kanban, 1, NoString()) # nostring is invalid title
68 | with self.assertRaises(ValueError):
69 | WeaveKanbanTask(self.kanban, 1, "Test Task", status="invalid_status") # invalid status
70 |
71 | class TestWeaveKanban(unittest.TestCase):
72 |
73 | def setUp(self):
74 | self.agent = WeaveAgent("placeholder")
75 | self.kanban = WeaveKanban(self.agent)
76 |
77 | def test_add_task(self):
78 | self.kanban.add_task("Test Task")
79 | self.assertEqual(len(self.kanban.tasks), 1)
80 | self.assertEqual(self.kanban.tasks[0].title, "Test Task")
81 |
82 | def test_get_task(self):
83 | self.kanban.add_task("Test Task")
84 | task = self.kanban.get_task(1)
85 | self.assertIsNotNone(task)
86 | self.assertEqual(task.title, "Test Task")
87 |
88 | def test_view_board(self):
89 | self.kanban.add_task("Test Task 1")
90 | self.kanban.add_task("Test Task 2")
91 | board_view = self.kanban.view_board()
92 | self.assertIn("Test Task 1", board_view)
93 | self.assertIn("Test Task 2", board_view)
94 |
95 | def test_unblock(self):
96 | self.kanban.add_task("Blocker Task")
97 | self.kanban.add_task("Blocked Task", status="blocked", blocked_on=[1])
98 | blocker_task = self.kanban.get_task(1)
99 | blocked_task = self.kanban.get_task(2)
100 | blocker_task.completed("Blocker task completed")
101 | self.kanban.unblock()
102 | self.assertEqual(blocked_task.status, 'idle')
103 |
104 | def test_to_json(self):
105 | self.kanban.add_task("Test Task")
106 | json_str = self.kanban.to_json()
107 | self.assertIn("Test Task", json_str)
108 |
109 | def test_from_json(self):
110 | self.kanban.add_task("Test Task")
111 | json_str = self.kanban.to_json()
112 | new_kanban = WeaveKanban(self.agent)
113 | new_kanban.from_json(json_str)
114 | self.assertEqual(len(new_kanban.tasks), 1)
115 | self.assertEqual(new_kanban.tasks[0].title, "Test Task")
116 |
117 | if __name__ == '__main__':
118 | unittest.main()
119 |
--------------------------------------------------------------------------------
/agent/tools/atspi_desktop.py:
--------------------------------------------------------------------------------
1 | import pyatspi
2 | import pyautogui
3 | import time
4 | import math
5 |
6 | class ATSPIDesktopTurtle:
7 | """Assistive Technology Service Provider Interface and PyAutoGUI based Linux desktop
8 | interaction tool for weave-agent with Logo Turtle like movement semantics."""
9 | def __init__(self):
10 | self.registry = pyatspi.Registry()
11 | self.desktop = self.registry.getDesktop(0)
12 | self.heading = 0 # Initial heading in degrees (0 degrees is to the right)
13 |
14 | def forward(self, distance=10):
15 | radians = math.radians(self.heading)
16 | dx = distance * math.cos(radians)
17 | dy = distance * math.sin(radians)
18 | pyautogui.moveRel(dx, dy)
19 |
20 | def backward(self, distance=10):
21 | radians = math.radians(self.heading)
22 | dx = -distance * math.cos(radians)
23 | dy = -distance * math.sin(radians)
24 | pyautogui.moveRel(dx, dy)
25 |
26 | def right(self, distance=10):
27 | radians = math.radians(self.heading + 90)
28 | dx = distance * math.cos(radians)
29 | dy = distance * math.sin(radians)
30 | pyautogui.moveRel(dx, dy)
31 |
32 | def left(self, distance=10):
33 | radians = math.radians(self.heading - 90)
34 | dx = distance * math.cos(radians)
35 | dy = distance * math.sin(radians)
36 | pyautogui.moveRel(dx, dy)
37 |
38 | def goto(self, x, y):
39 | pyautogui.moveTo(x, y)
40 |
41 | def setx(self, x):
42 | current_x, current_y = pyautogui.position()
43 | pyautogui.moveTo(x, current_y)
44 |
45 | def sety(self, y):
46 | current_x, current_y = pyautogui.position()
47 | pyautogui.moveTo(current_x, y)
48 |
49 | def setheading(self, angle):
50 | self.heading = angle
51 |
52 | def home(self):
53 | pyautogui.moveTo(0, 0)
54 |
55 | def speed(self, speed):
56 | pyautogui.PAUSE = 1 / speed
57 |
58 | def input_string(self, text):
59 | pyautogui.typewrite(text)
60 |
61 | def input_key_combination(self, keys):
62 | pyautogui.hotkey(*keys)
63 |
64 | def get_screen_elements(self):
65 | elements = []
66 | for app in self.desktop:
67 | for child in app:
68 | elements.append(child)
69 | return elements
70 |
71 | def get_keyboard_focusable_elements(self):
72 | focusable_elements = []
73 | for app in self.desktop:
74 | for child in app:
75 | if child.get_state().contains(pyatspi.STATE_FOCUSABLE):
76 | focusable_elements.append(child)
77 | return focusable_elements
78 |
79 | def get_current_object_under_cursor(self):
80 | x, y = pyautogui.position()
81 | obj = self.registry.getAccessibleAtPoint(x, y, pyatspi.DESKTOP_COORDS)
82 | return obj
83 |
84 | def get_current_object_with_keyboard_focus(self):
85 | focus = self.registry.getFocus()
86 | return focus
87 |
88 | def scan(self):
89 | elements_info = []
90 | for app in self.desktop:
91 | for child in app:
92 | extents = child.queryComponent().getExtents(pyatspi.DESKTOP_COORDS)
93 | x, y, width, height = extents.x, extents.y, extents.width, extents.height
94 | elements_info.append({
95 | 'element': child,
96 | 'x': x,
97 | 'y': y,
98 | 'width': width,
99 | 'height': height
100 | })
101 | return elements_info
102 |
103 | def get_element_text(self, element):
104 | text_content = []
105 |
106 | def traverse(elem):
107 | if elem is not None:
108 | try:
109 | text_interface = elem.queryText()
110 | if text_interface:
111 | text_content.append(text_interface.getText(0, -1))
112 | except NotImplementedError:
113 | pass
114 |
115 | for child in elem:
116 | traverse(child)
117 |
118 | traverse(element)
119 | return "\n".join(text_content)
120 |
121 | # Example usage
122 | if __name__ == "__main__":
123 | turtle = ATSPIDesktopTurtle()
124 |
125 | # Scan the screen and print elements with their coordinates
126 | elements_info = turtle.scan()
127 | for info in elements_info:
128 | print(f"Element: {info['element']}, Coordinates: ({info['x']}, {info['y']}), Size: ({info['width']}, {info['height']})")
129 |
130 | # Get the text content of the element
131 | text_content = turtle.get_element_text(info['element'])
132 | if text_content:
133 | print(f"Text Content: {text_content}")
134 | else:
135 | print("No text content available.")
136 |
--------------------------------------------------------------------------------
/agent/tools/emacs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import libtmux
3 | import time
4 | from datetime import datetime
5 |
6 | class WeaveEmacs:
7 | """A wrapper for Emacs using libtmux to control the editor."""
8 | def __init__(self, agent):
9 | """Bind tool to weave-agent and set up Emacs."""
10 | self.agent = agent
11 | self.agent.tools["emacs"] = self
12 | self.observation_view = {
13 | "type": "observation",
14 | "title": "WeaveEmacs",
15 | "callback": self.render
16 | }
17 | self.agent.add_observation_view("WeaveEmacs", self.render, tool="emacs")
18 |
19 | # Initialize the tmux session and pane
20 | self.server = libtmux.Server()
21 | self.session = self.server.new_session(session_name="emacs_session", kill_session=True)
22 | self.window = self.session.new_window(window_name="emacs_window", attach=True)
23 | self.window.resize(height=24, width=80)
24 | self.pane = self.window.split_window(attach=True, size="100%")
25 |
26 | # Start Emacs in the tmux pane
27 | self.pane.send_keys('emacs -nw\n')
28 |
29 | def render(self, agent):
30 | """Render the current Emacs buffer state."""
31 | rendered_text = "'''Emacs Editor State:\n"
32 | try:
33 | pane_content = self.pane.capture_pane(start=0, end="-")
34 | except Exception as e:
35 | return f"Error capturing Emacs buffer: {str(e)}"
36 |
37 | if not pane_content:
38 | return "Emacs buffer is empty. Start editing to see content."
39 |
40 | # Show last 20 lines of the buffer
41 | buffer_excerpt = "\n".join(pane_content[-20:])
42 | rendered_text += f"Current Buffer Excerpt:\n{buffer_excerpt}\n\n"
43 |
44 | command_cheat_sheet = (
45 | "Emacs Key Bindings Cheat Sheet:\n"
46 | " C-x C-f : Open/create file\n"
47 | " C-x C-s : Save current buffer\n"
48 | " C-x C-w : Save buffer as...\n"
49 | " C-x C-c : Exit Emacs\n"
50 | " C-g : Cancel current command\n"
51 | " C-s : Search forward\n"
52 | " C-r : Search backward\n"
53 | " C-a : Beginning of line\n"
54 | " C-e : End of line\n"
55 | " C-n : Next line\n"
56 | " C-p : Previous line\n"
57 | " C-v : Scroll down\n"
58 | " M-v : Scroll up\n"
59 | " M-x : Execute extended command\n"
60 | " C-k : Kill line\n"
61 | " C-y : Yank (paste)\n"
62 | " C-space : Set mark\n"
63 | " C-w : Kill region\n"
64 | " M-w : Copy region\n"
65 | " C-_ : Undo\n"
66 | " C-x u : Undo\n"
67 | " C-x d : Open directory\n"
68 | " C-x b : Switch buffer\n"
69 | )
70 | rendered_text += command_cheat_sheet
71 | rendered_text += "\n'''"
72 | return rendered_text
73 |
74 | def send_keys(self, command):
75 | """Send a keyboard command to Emacs."""
76 | self.pane.send_keys(command, enter=False)
77 | time.sleep(0.2) # Allow time for buffer updates
78 |
79 | def send_command(self, command):
80 | """Alias to send commands to Emacs."""
81 | self.send_keys(command)
82 |
83 | def close(self):
84 | """Close the Emacs session."""
85 | self.session.kill_session()
86 | del self.agent.tools["emacs"]
87 | self.agent.remove_observation_view(self.observation_view)
88 |
89 | # Example usage
90 | if __name__ == "__main__":
91 | class DummyAgent:
92 | def __init__(self):
93 | self.tools = {}
94 | self.observation_views = []
95 |
96 | def add_observation_view(self, title, callback):
97 | self.observation_views.append((title, callback))
98 |
99 | def remove_observation_view(self, view):
100 | self.observation_views.remove(view)
101 |
102 | agent = DummyAgent()
103 | emacs = WeaveEmacs(agent)
104 |
105 | # Simulate basic Emacs usage
106 | time.sleep(2) # Wait for Emacs to start
107 | emacs.send_command('C-x C-f test.txt')
108 | emacs.send_command('Hello World')
109 | emacs.send_command('C-x C-s')
110 | emacs.send_command('C-x C-c')
111 |
112 | try:
113 | while True:
114 | pass
115 | except KeyboardInterrupt:
116 | emacs.close()
117 |
--------------------------------------------------------------------------------
/agent/tools/nano.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | import libtmux
3 | import time
4 | import os
5 |
6 | class WeaveNano:
7 | """A wrapper for Nano using libtmux to control the editor."""
8 | def __init__(self, agent, filepath):
9 | self.agent = agent
10 | self.filepath = os.path.abspath(filepath)
11 | # Limit to one instance per file
12 | # This can be changed later if it wants multiple views of the same file(?)
13 | if f"nano-{self.filepath}" in self.agent.tools:
14 | editor = self.agent.tools[f"nano-{self.filepath}"]
15 | editor.close()
16 | self.agent.tools[f"nano-{self.filepath}"] = self
17 | # Let agent grab the last instianted nano instance with this shortcut
18 | self.agent.tools["nano"] = self
19 | self.observation_view = {
20 | "type": "observation",
21 | "title": "WeaveNano",
22 | "callback": self.render
23 | }
24 | # Prevent empty WeaveNano object when observation views are at capacity
25 | try:
26 | self.agent.add_observation_view("WeaveNano", self.render, tool="nano")
27 | except ValueError as e:
28 | del self.agent.tools[f"nano-{self.filepath}"]
29 | del self.agent.tools["nano"]
30 | raise ValueError("Couldn't open editor because there are too many "
31 | + "observation views. Try removing some.")
32 |
33 | # Tmux setup
34 | self.server = libtmux.Server()
35 | session_name = f"nano_{self.filepath}".replace(".","")
36 | self.session = self.server.new_session(session_name=session_name, kill_session=True)
37 | self.window = self.session.new_window(window_name="nano_window", attach=True)
38 | self.window.resize(height=24, width=80)
39 | self.pane = self.window.split_window(attach=True, size="100%")
40 |
41 | # Start Nano
42 | self.pane.send_keys(f'nano {filepath}\n')
43 | time.sleep(1) # Wait for Nano to start
44 |
45 | def render(self, agent):
46 | """Render the Nano editor display."""
47 | try:
48 | content = ''
49 | for i, line in enumerate(self.pane.capture_pane(start=0, end="-")):
50 | content += line + "\n"
51 | #if i < 3:
52 | # content += line + "\n"
53 | #elif i > 18:
54 | # content += line + "\n"
55 | #else:
56 | # content += f"{i-2}. | {line}\n"
57 | content = content[:-1]
58 | return f"""'''Nano Editor State:
59 | {content}
60 | -----------------
61 | Nano Cheat Sheet:
62 | C-o - Save
63 | C-x - Exit
64 | C-k - Cut line
65 | C-u - Paste
66 | C-w - Search
67 | C-\\ - Replace
68 | C-g - Help
69 | '''"""
70 | except Exception as e:
71 | return f"Error getting Nano state: {str(e)}"
72 |
73 | def send_keys(self, command):
74 | """Send commands to Nano with proper timing"""
75 | # Special handling for control sequences
76 | self.pane.send_keys(command, enter=False)
77 | time.sleep(0.2) # Nano needs time to process
78 |
79 | def send_command(self, command):
80 | """Alias of send_keys"""
81 | self.send_keys(command)
82 |
83 | def send_commands(self, commands):
84 | for command in commands:
85 | self.send_keys(command)
86 |
87 | def get_screen_text(self):
88 | """Alias for render()"""
89 | return render(self.agent)
90 |
91 | def close(self):
92 | """Close the Nano session"""
93 | self.session.kill_session()
94 | del self.agent.tools["nano"]
95 | del self.agent.tools[f"nano-{self.filepath}"]
96 | self.agent.remove_observation_view(self.observation_view)
97 |
--------------------------------------------------------------------------------
/agent/trainer/linear_4bit_sharded.py:
--------------------------------------------------------------------------------
1 | """4-bit quantization and FSDP-style parameter sharding for nn.Linear."""
2 |
3 | import bitsandbytes as bnb
4 | import torch
5 | from torch import distributed as dist, nn
6 | from torch.nn import functional as F
7 |
8 |
9 | class Linear4bitSharded(nn.Linear):
10 | def __init__(
11 | self,
12 | layer,
13 | device,
14 | group=None,
15 | ):
16 | if not isinstance(layer, nn.Linear):
17 | raise ValueError("layer must be an instance of nn.Linear")
18 | with torch.device("meta"):
19 | super().__init__(layer.in_features, layer.out_features, bias=layer.bias is not None)
20 | self.in_features = layer.in_features
21 | self.out_features = layer.out_features
22 | self.bias = nn.Parameter(layer.bias.to(device)) if layer.bias is not None else None
23 | del self.weight
24 | self.device = device
25 | self.group = group
26 | rank = dist.get_rank(group)
27 | world_size = dist.get_world_size(group)
28 | weight_q, state = bnb.functional.quantize_4bit(layer.weight.to(device))
29 | self.state = state
30 | self.weight_shape = weight_q.shape
31 | weight_q = weight_q.flatten()
32 | assert weight_q.shape[0] % world_size == 0
33 | n_per_shard = weight_q.shape[0] // world_size
34 | weight_q = weight_q[rank * n_per_shard : (rank + 1) * n_per_shard].clone()
35 | self.register_buffer("weight", weight_q)
36 |
37 | def forward(self, x):
38 | world_size = dist.get_world_size(self.group)
39 | weight_list = [torch.empty_like(self.weight) for _ in range(world_size)]
40 | dist.all_gather(weight_list, self.weight, group=self.group)
41 | weight_q = torch.cat(weight_list).view(self.weight_shape)
42 | weight = bnb.functional.dequantize_4bit(weight_q, self.state)
43 | return F.linear(x, weight, self.bias)
44 |
45 |
46 | def quantize_and_shard(module, device, group=None):
47 | if isinstance(module, nn.Linear):
48 | return Linear4bitSharded(module, device, group)
49 | for name, child in module.named_children():
50 | setattr(module, name, quantize_and_shard(child, device, group))
51 | return module.cuda(device)
52 |
--------------------------------------------------------------------------------
/agent/trainer/linear_4bit_sharded_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import bitsandbytes as bnb
4 | import torch
5 | from torch import distributed as dist, nn
6 | from torch.distributed import nn as dnn
7 | import torch_dist_utils as du
8 |
9 | from linear_4bit_sharded import Linear4bitSharded
10 |
11 |
12 | @torch.no_grad()
13 | def quantize_layer(
14 | module, compute_dtype=None, blocksize=64, compress_statistics=True, quant_type="fp4"
15 | ):
16 | if not isinstance(module, torch.nn.Linear):
17 | raise ValueError("quantize_layer only supports nn.Linear")
18 | compute_dtype = module.weight.dtype if compute_dtype is None else compute_dtype
19 | q_module = bnb.nn.Linear4bit(
20 | module.in_features,
21 | module.out_features,
22 | bias=module.bias is not None,
23 | compute_dtype=compute_dtype,
24 | compress_statistics=compress_statistics,
25 | quant_type=quant_type,
26 | )
27 | q_module.weight = bnb.nn.Params4bit(
28 | module.weight,
29 | requires_grad=False,
30 | blocksize=blocksize,
31 | compress_statistics=compress_statistics,
32 | quant_type=quant_type,
33 | )
34 | if module.bias is not None:
35 | q_module.bias = torch.nn.Parameter(module.bias, requires_grad=module.bias.requires_grad)
36 | if module.weight.device.type == "cuda":
37 | q_module.cuda(module.weight.device)
38 | return q_module
39 |
40 |
41 | def main():
42 | du.init_distributed()
43 | device = du.get_device()
44 | rank = dist.get_rank()
45 |
46 | layer = nn.Linear(10, 20)
47 | du.broadcast_tensors(layer.parameters())
48 | layer_q = quantize_layer(layer).to(device)
49 | layer_qs = Linear4bitSharded(layer, device)
50 |
51 | x = torch.randn(4, 10, device=device)
52 | y_ref = layer_q(x)
53 | y = layer_qs(x)
54 | error = torch.sqrt(torch.mean((y - y_ref) ** 2))
55 | with du.do_in_order():
56 | print(f"Rank {rank}: error = {error}")
57 |
58 |
59 | if __name__ == "__main__":
60 | main()
61 |
--------------------------------------------------------------------------------
/agent/trainer/mistral3_ring_attn_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import torch
4 | from torch import distributed as dist
5 | from torch.distributed import nn as dnn
6 | import torch_dist_utils as du
7 | from transformers import AutoModelForImageTextToText, AutoTokenizer, BitsAndBytesConfig
8 |
9 | from patch_model import patch_model
10 |
11 |
12 | def kl_divergence(logits_p, logits_q):
13 | logp = torch.nn.functional.log_softmax(logits_p, dim=-1)
14 | logq = torch.nn.functional.log_softmax(logits_q, dim=-1)
15 | return torch.sum(torch.exp(logp) * (logp - logq), dim=-1)
16 |
17 |
18 | def main():
19 | du.init_distributed()
20 | device = du.get_device()
21 | rank = dist.get_rank()
22 | world_size = dist.get_world_size()
23 |
24 | model_name = "mistralai/Mistral-Small-3.1-24B-Base-2503"
25 | tokenizer = AutoTokenizer.from_pretrained(model_name)
26 | prompt = "The quick brown fox jumps over the lazy dog, " * 8
27 | tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64]
28 | n_tokens = tokens.shape[1]
29 | assert n_tokens % world_size == 0
30 | position_ids = torch.arange(n_tokens, device=device)[None]
31 | n_tokens_device = n_tokens // world_size
32 | du.print0("Number of tokens:", n_tokens)
33 | du.print0("Number of tokens per device:", n_tokens_device)
34 |
35 | bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
36 |
37 | model = (
38 | AutoModelForImageTextToText.from_pretrained(
39 | model_name,
40 | device_map={"": device},
41 | torch_dtype=torch.bfloat16,
42 | quantization_config=bnb_config,
43 | )
44 | .eval()
45 | .requires_grad_(False)
46 | )
47 | logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits
48 |
49 | del model
50 | torch.cuda.empty_cache()
51 | patch_model()
52 |
53 | model = (
54 | AutoModelForImageTextToText.from_pretrained(
55 | model_name,
56 | device_map={"": device},
57 | torch_dtype=torch.bfloat16,
58 | quantization_config=bnb_config,
59 | )
60 | .eval()
61 | .requires_grad_(False)
62 | )
63 | tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
64 | position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
65 | logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits
66 | logits_all = torch.cat(dnn.all_gather(logits), dim=1)
67 |
68 | error = kl_divergence(logits_ref, logits_all).mean()
69 | with du.do_in_order():
70 | print(f"Rank {rank}: error = {error}")
71 |
72 |
73 | if __name__ == "__main__":
74 | main()
75 |
--------------------------------------------------------------------------------
/agent/trainer/mixtral_ring_attn_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import torch
4 | from torch import distributed as dist
5 | from torch.distributed import nn as dnn
6 | import torch_dist_utils as du
7 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
8 |
9 | from patch_model import patch_model
10 |
11 |
12 | def kl_divergence(logits_p, logits_q):
13 | logp = torch.nn.functional.log_softmax(logits_p, dim=-1)
14 | logq = torch.nn.functional.log_softmax(logits_q, dim=-1)
15 | return torch.sum(torch.exp(logp) * (logp - logq), dim=-1)
16 |
17 |
18 | def main():
19 | du.init_distributed()
20 | device = du.get_device()
21 | rank = dist.get_rank()
22 | world_size = dist.get_world_size()
23 |
24 | model_name = "mistralai/Mixtral-8x7B-v0.1"
25 | tokenizer = AutoTokenizer.from_pretrained(model_name)
26 | prompt = "The quick brown fox jumps over the lazy dog, " * 8
27 | tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64]
28 | n_tokens = tokens.shape[1]
29 | assert n_tokens % world_size == 0
30 | position_ids = torch.arange(n_tokens, device=device)[None]
31 | n_tokens_device = n_tokens // world_size
32 | du.print0("Number of tokens:", n_tokens)
33 | du.print0("Number of tokens per device:", n_tokens_device)
34 |
35 | bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
36 |
37 | model = (
38 | AutoModelForCausalLM.from_pretrained(
39 | model_name,
40 | device_map={"": device},
41 | torch_dtype=torch.bfloat16,
42 | quantization_config=bnb_config,
43 | )
44 | .eval()
45 | .requires_grad_(False)
46 | )
47 | logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits
48 |
49 | del model
50 | torch.cuda.empty_cache()
51 | patch_model()
52 |
53 | model = (
54 | AutoModelForCausalLM.from_pretrained(
55 | model_name,
56 | device_map={"": device},
57 | torch_dtype=torch.bfloat16,
58 | quantization_config=bnb_config,
59 | )
60 | .eval()
61 | .requires_grad_(False)
62 | )
63 | tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
64 | position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
65 | logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits
66 | logits_all = torch.cat(dnn.all_gather(logits), dim=1)
67 |
68 | error = kl_divergence(logits_ref, logits_all).mean()
69 | with du.do_in_order():
70 | print(f"Rank {rank}: error = {error}")
71 |
72 |
73 | if __name__ == "__main__":
74 | main()
75 |
--------------------------------------------------------------------------------
/agent/trainer/patch_model.py:
--------------------------------------------------------------------------------
1 | from transformers import modeling_utils
2 |
3 | from ring_attn import ring_attn
4 |
5 |
6 | class RingAttentionForward:
7 | def __init__(self, group):
8 | self.group = group
9 |
10 | def __call__(self, module, query, key, value, *args, **kwargs):
11 | query = query.transpose(1, 2)
12 | key = key.transpose(1, 2)
13 | value = value.transpose(1, 2)
14 | out = ring_attn(query, key, value, causal=module.is_causal, group=self.group)
15 | return out, None
16 |
17 |
18 | def patch_model(group=None):
19 | modeling_utils.ALL_ATTENTION_FUNCTIONS["sdpa"] = RingAttentionForward(group)
20 |
--------------------------------------------------------------------------------
/agent/trainer/qwen2_ring_attn_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import torch
4 | from torch import distributed as dist
5 | from torch.distributed import nn as dnn
6 | import torch_dist_utils as du
7 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
8 |
9 | from patch_model import patch_model
10 |
11 |
12 | def kl_divergence(logits_p, logits_q):
13 | logp = torch.nn.functional.log_softmax(logits_p, dim=-1)
14 | logq = torch.nn.functional.log_softmax(logits_q, dim=-1)
15 | return torch.sum(torch.exp(logp) * (logp - logq), dim=-1)
16 |
17 |
18 | def main():
19 | du.init_distributed()
20 | device = du.get_device()
21 | rank = dist.get_rank()
22 | world_size = dist.get_world_size()
23 |
24 | model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"
25 | tokenizer = AutoTokenizer.from_pretrained(model_name)
26 | prompt = "The quick brown fox jumps over the lazy dog, " * 8
27 | tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64]
28 | n_tokens = tokens.shape[1]
29 | assert n_tokens % world_size == 0
30 | position_ids = torch.arange(n_tokens, device=device)[None]
31 | n_tokens_device = n_tokens // world_size
32 | du.print0("Number of tokens:", n_tokens)
33 | du.print0("Number of tokens per device:", n_tokens_device)
34 |
35 | bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
36 |
37 | model = (
38 | AutoModelForCausalLM.from_pretrained(
39 | model_name,
40 | device_map={"": device},
41 | torch_dtype=torch.bfloat16,
42 | quantization_config=bnb_config,
43 | )
44 | .eval()
45 | .requires_grad_(False)
46 | )
47 | logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits
48 |
49 | del model
50 | torch.cuda.empty_cache()
51 | patch_model()
52 |
53 | model = (
54 | AutoModelForCausalLM.from_pretrained(
55 | model_name,
56 | device_map={"": device},
57 | torch_dtype=torch.bfloat16,
58 | quantization_config=bnb_config,
59 | )
60 | .eval()
61 | .requires_grad_(False)
62 | )
63 | tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
64 | position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device]
65 | logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits
66 | logits_all = torch.cat(dnn.all_gather(logits), dim=1)
67 |
68 | error = kl_divergence(logits_ref, logits_all).mean()
69 | with du.do_in_order():
70 | print(f"Rank {rank}: error = {error}")
71 |
72 |
73 | if __name__ == "__main__":
74 | main()
75 |
--------------------------------------------------------------------------------
/agent/trainer/ring_attn.py:
--------------------------------------------------------------------------------
1 | """Ring attention for PyTorch.
2 |
3 | See https://github.com/nshepperd/flash_attn_jax/blob/main/src/flash_attn_jax/ring_attention.py.
4 | """
5 |
6 | import flash_attn.flash_attn_interface as fai
7 | import torch
8 | from torch import distributed as dist
9 |
10 |
11 | def ppermute(xs, perm, group=None):
12 | rank = dist.get_rank(group)
13 | ys = [torch.empty_like(x) for x in xs]
14 | ops = []
15 | for src, dst in perm:
16 | for x, y in zip(xs, ys):
17 | if src == rank:
18 | ops.append(dist.P2POp(dist.isend, x, dst, group))
19 | if dst == rank:
20 | ops.append(dist.P2POp(dist.irecv, y, src, group))
21 | reqs = dist.batch_isend_irecv(ops)
22 | for req in reqs:
23 | req.wait()
24 | return ys
25 |
26 |
27 | def _flash_fwd(q, k, v, causal):
28 | ret = fai._flash_attn_forward(
29 | q=q,
30 | k=k,
31 | v=v,
32 | dropout_p=0.0,
33 | softmax_scale=k.shape[-1] ** -0.5,
34 | causal=causal,
35 | window_size=(-1, 0) if causal else (-1, -1),
36 | softcap=0.0,
37 | alibi_slopes=None,
38 | return_softmax=False,
39 | )
40 | return ret[0], ret[5] # out, lse
41 |
42 |
43 | def _flash_bwd(do, q, k, v, o, lse, causal):
44 | ret = fai._flash_attn_backward(
45 | dout=do,
46 | q=q,
47 | k=k,
48 | v=v,
49 | out=o,
50 | softmax_lse=lse,
51 | dq=torch.empty_like(q),
52 | dk=torch.empty_like(k),
53 | dv=torch.empty_like(v),
54 | dropout_p=0,
55 | softmax_scale=k.shape[-1] ** -0.5,
56 | causal=causal,
57 | window_size=(-1, 0) if causal else (-1, -1),
58 | softcap=0.0,
59 | alibi_slopes=None,
60 | deterministic=False,
61 | rng_state=None,
62 | )
63 | return ret[0], ret[1], ret[2] # dq, dk, dv
64 |
65 |
66 | def _ring_fwd(q, k, v, causal=False, group=None):
67 | rank = dist.get_rank(group)
68 | world_size = dist.get_world_size(group)
69 | perm = [(i, (i + 1) % world_size) for i in range(world_size)]
70 | n, s, h, d = q.shape
71 | q_ix = torch.tensor(rank, device=q.device)
72 | k_ix = torch.tensor(rank, device=q.device)
73 | o = torch.zeros_like(q, dtype=torch.float32)
74 | lse = torch.full((n, h, s), float("-inf"), device=q.device, dtype=torch.float32)
75 | for _ in range(world_size):
76 | o1, lse1 = o, lse
77 | if not causal:
78 | o2, lse2 = _flash_fwd(q, k, v, causal=False)
79 | else:
80 | if q_ix < k_ix:
81 | o2 = torch.zeros_like(q)
82 | lse2 = torch.full((n, h, s), float("-inf"), device=q.device, dtype=torch.float32)
83 | elif q_ix == k_ix:
84 | o2, lse2 = _flash_fwd(q, k, v, causal=True)
85 | else:
86 | o2, lse2 = _flash_fwd(q, k, v, causal=False)
87 | lse = torch.logaddexp(lse1, lse2)
88 | o = o1 * torch.exp(lse1 - lse).mT[..., None] + o2 * torch.exp(lse2 - lse).mT[..., None]
89 | k, v, k_ix = ppermute([k, v, k_ix], perm, group)
90 | return o.to(q.dtype), lse
91 |
92 |
93 | def _ring_bwd(do, q, k, v, o, lse, causal=False, group=None):
94 | rank = dist.get_rank(group)
95 | world_size = dist.get_world_size(group)
96 | perm = [(i, (i + 1) % world_size) for i in range(world_size)]
97 | ix = torch.tensor(rank, device=q.device)
98 | dq = torch.zeros_like(q, dtype=torch.float32)
99 | dk = torch.zeros_like(k, dtype=torch.float32)
100 | dv = torch.zeros_like(v, dtype=torch.float32)
101 | k2, v2, dk2, dv2, ix2 = k, v, dk, dv, ix
102 | for _ in range(world_size):
103 | dk2_, dv2_, k2_, v2_, ix2_ = ppermute([dk2, dv2, k2, v2, ix2], perm, group)
104 | if not causal:
105 | dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=False)
106 | dq += dqa
107 | dk2_ += dka
108 | dv2_ += dva
109 | else:
110 | if ix == ix2:
111 | dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=True)
112 | elif ix > ix2:
113 | dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=False)
114 | if ix >= ix2:
115 | dq += dqa
116 | dk2_ += dka
117 | dv2_ += dva
118 | k2, v2, dk2, dv2, ix2 = k2_, v2_, dk2_, dv2_, ix2_
119 | dk2, dv2 = ppermute([dk2, dv2], perm)
120 | return dq.to(q.dtype), dk2.to(k.dtype), dv2.to(v.dtype)
121 |
122 |
123 | class _RingAttention(torch.autograd.Function):
124 | @staticmethod
125 | def setup_context(ctx, inputs, output):
126 | q, k, v, causal, group = inputs
127 | o, lse = output
128 | ctx.causal = causal
129 | ctx.group = group
130 | ctx.save_for_backward(q, k, v, o, lse)
131 |
132 | @staticmethod
133 | def forward(q, k, v, causal, group):
134 | return _ring_fwd(q, k, v, causal=causal, group=group)
135 |
136 | @staticmethod
137 | def backward(ctx, do, _):
138 | q, k, v, o, lse = ctx.saved_tensors
139 | dq, dk, dv = _ring_bwd(do, q, k, v, o, lse, causal=ctx.causal, group=ctx.group)
140 | return dq, dk, dv, None, None
141 |
142 |
143 | def ring_attn(q, k, v, causal=False, group=None):
144 | o, lse = _RingAttention.apply(q, k, v, causal, group)
145 | return o
146 |
--------------------------------------------------------------------------------
/agent/trainer/ring_attn_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import flash_attn
4 | import torch
5 | from torch import distributed as dist
6 | from torch.distributed import nn as dnn
7 | import torch_dist_utils as du
8 |
9 | from ring_attn import ppermute, ring_attn
10 |
11 |
12 | def main():
13 | du.init_distributed()
14 | device = du.get_device()
15 | rank = dist.get_rank()
16 | world_size = dist.get_world_size()
17 |
18 | # test ppermute
19 | du.print0("Testing ppermute...")
20 | x = torch.arange(rank * 4, (rank + 1) * 4, device=device)
21 | perm = [(i, (i + 1) % world_size) for i in range(world_size)]
22 | y = ppermute([x], perm)
23 | with du.do_in_order():
24 | print(f"Rank {rank}: x = {x}, y = {y}")
25 |
26 | q = torch.randn(4, 10, 8, 64, device=device, dtype=torch.bfloat16)
27 | k = torch.randn(4, 10, 4, 64, device=device, dtype=torch.bfloat16)
28 | v = torch.randn(4, 10, 4, 64, device=device, dtype=torch.bfloat16)
29 | do = torch.randn(4, 10, 8, 64, device=device, dtype=torch.bfloat16)
30 | q_all = torch.cat(dnn.all_gather(q), dim=1)
31 | k_all = torch.cat(dnn.all_gather(k), dim=1)
32 | v_all = torch.cat(dnn.all_gather(v), dim=1)
33 | do_all = torch.cat(dnn.all_gather(do), dim=1)
34 |
35 | # non-causal
36 | du.print0("Testing non-causal ring attention...")
37 | q_all_ = q_all.clone().requires_grad_()
38 | k_all_ = k_all.clone().requires_grad_()
39 | v_all_ = v_all.clone().requires_grad_()
40 | o_ref = flash_attn.flash_attn_func(q_all_, k_all_, v_all_, causal=False)
41 | o_ref.backward(do_all)
42 | q_ = q.clone().requires_grad_()
43 | k_ = k.clone().requires_grad_()
44 | v_ = v.clone().requires_grad_()
45 | o = ring_attn(q_, k_, v_, causal=False)
46 | o.backward(do)
47 | o_all = torch.cat(dnn.all_gather(o), dim=1)
48 | dq_all = torch.cat(dnn.all_gather(q_.grad), dim=1)
49 | dk_all = torch.cat(dnn.all_gather(k_.grad), dim=1)
50 | dv_all = torch.cat(dnn.all_gather(v_.grad), dim=1)
51 | error_o = torch.sqrt(torch.mean((o_all - o_ref) ** 2))
52 | error_dq = torch.sqrt(torch.mean((q_all_.grad - dq_all) ** 2))
53 | error_dk = torch.sqrt(torch.mean((k_all_.grad - dk_all) ** 2))
54 | error_dv = torch.sqrt(torch.mean((v_all_.grad - dv_all) ** 2))
55 | with du.do_in_order():
56 | print(f"Rank {rank}: error o = {error_o}")
57 | print(f"Rank {rank}: error dq = {error_dq}")
58 | print(f"Rank {rank}: error dk = {error_dk}")
59 | print(f"Rank {rank}: error dv = {error_dv}")
60 |
61 | # causal
62 | du.print0("Testing causal ring attention...")
63 | q_all_ = q_all.clone().requires_grad_()
64 | k_all_ = k_all.clone().requires_grad_()
65 | v_all_ = v_all.clone().requires_grad_()
66 | o_ref = flash_attn.flash_attn_func(q_all_, k_all_, v_all_, causal=True)
67 | o_ref.backward(do_all)
68 | q_ = q.clone().requires_grad_()
69 | k_ = k.clone().requires_grad_()
70 | v_ = v.clone().requires_grad_()
71 | o = ring_attn(q_, k_, v_, causal=True)
72 | o.backward(do)
73 | o_all = torch.cat(dnn.all_gather(o), dim=1)
74 | dq_all = torch.cat(dnn.all_gather(q_.grad), dim=1)
75 | dk_all = torch.cat(dnn.all_gather(k_.grad), dim=1)
76 | dv_all = torch.cat(dnn.all_gather(v_.grad), dim=1)
77 | error_o = torch.sqrt(torch.mean((o_all - o_ref) ** 2))
78 | error_dq = torch.sqrt(torch.mean((q_all_.grad - dq_all) ** 2))
79 | error_dk = torch.sqrt(torch.mean((k_all_.grad - dk_all) ** 2))
80 | error_dv = torch.sqrt(torch.mean((v_all_.grad - dv_all) ** 2))
81 | with du.do_in_order():
82 | print(f"Rank {rank}: error o = {error_o}")
83 | print(f"Rank {rank}: error dq = {error_dq}")
84 | print(f"Rank {rank}: error dk = {error_dk}")
85 | print(f"Rank {rank}: error dv = {error_dv}")
86 |
87 |
88 | if __name__ == "__main__":
89 | main()
90 |
--------------------------------------------------------------------------------
/agent/trainer/trainer_val_loss.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import argparse
4 | import json
5 | import os
6 | from pathlib import Path
7 |
8 | import peft
9 | import torch
10 | from torch import distributed as dist
11 | from torch.nn import functional as F
12 | from torch.utils import data
13 | import torch_dist_utils as du
14 | from tqdm import tqdm
15 | from transformers import AutoModelForCausalLM, AutoModelForImageTextToText, AutoTokenizer
16 |
17 | from linear_4bit_sharded import quantize_and_shard
18 | from patch_model import patch_model
19 |
20 | print = tqdm.external_write_mode()(print)
21 | print0 = tqdm.external_write_mode()(du.print0)
22 |
23 |
24 | class Dataset(data.Dataset):
25 | def __init__(self, path, tokenizer):
26 | self.tokenizer = tokenizer
27 | self.dataset = Path(path).read_text().splitlines()
28 |
29 | def __len__(self):
30 | return len(self.dataset)
31 |
32 | def __getitem__(self, idx):
33 | return self.tokenizer(json.loads(self.dataset[idx])["text"]).input_ids
34 |
35 |
36 | class CollateFn:
37 | def __init__(self, seq_len):
38 | self.seq_len = seq_len
39 |
40 | def __call__(self, batch):
41 | input_ids = torch.full((len(batch), self.seq_len), 0, dtype=torch.long)
42 | target_ids = torch.full((len(batch), self.seq_len), -100, dtype=torch.long)
43 | for i, x in enumerate(batch):
44 | ids = torch.tensor(x, dtype=torch.long)
45 | max_len = min(len(x) - 1, self.seq_len)
46 | input_ids[i, :max_len] = ids[:max_len]
47 | target_ids[i, :max_len] = ids[1 : max_len + 1]
48 | return input_ids, target_ids
49 |
50 |
51 | def main():
52 | parser = argparse.ArgumentParser(description=__doc__)
53 | parser.add_argument("--model", type=str, required=True, help="Model name or path")
54 | parser.add_argument("--adapter", type=str, help="adapter name or path")
55 | parser.add_argument("--dataset", type=Path, required=True, help="Dataset path")
56 | parser.add_argument("--batch-size", type=int, default=1, help="Batch size per group")
57 | parser.add_argument("--seq-len", type=int, required=True, help="Sequence length")
58 | parser.add_argument("--mistral3", action="store_true", help="Use Mistral 3 model")
59 | args = parser.parse_args()
60 |
61 | du.init_distributed()
62 | device = du.get_device()
63 | rank = dist.get_rank()
64 | world_size = dist.get_world_size()
65 | local_group = du.get_local_group()
66 | local_rank = dist.get_rank(local_group)
67 | local_world_size = dist.get_world_size(local_group)
68 | group_rank = int(os.environ["GROUP_RANK"])
69 | group_world_size = world_size // local_world_size
70 | seq_len_device = args.seq_len // local_world_size
71 |
72 | # this does the wrong thing when the number of examples is not divisible by the group world size
73 | # i'll fix it later
74 | tokenizer = AutoTokenizer.from_pretrained(args.model)
75 | dataset = Dataset(args.dataset, tokenizer)
76 | sampler = data.DistributedSampler(
77 | dataset, group_world_size, group_rank, shuffle=False, drop_last=True
78 | )
79 | dataloader = data.DataLoader(
80 | dataset,
81 | batch_size=args.batch_size,
82 | sampler=sampler,
83 | collate_fn=CollateFn(args.seq_len),
84 | )
85 |
86 | patch_model(local_group)
87 | model_cls = AutoModelForCausalLM if not args.mistral3 else AutoModelForImageTextToText
88 | model = model_cls.from_pretrained(args.model, torch_dtype=torch.bfloat16)
89 | model = quantize_and_shard(model, device, local_group)
90 | torch.cuda.empty_cache()
91 | if args.adapter:
92 | model = peft.PeftModel.from_pretrained(model, args.adapter)
93 |
94 | total_loss = torch.tensor(0.0, device=device)
95 | total_targets = torch.tensor(0, device=device)
96 |
97 | for i, (input_ids, target_ids) in enumerate(tqdm(dataloader, disable=rank != 0)):
98 | input_ids, target_ids = input_ids.to(device), target_ids.to(device)
99 | seq_start = local_rank * seq_len_device
100 | seq_end = (local_rank + 1) * seq_len_device
101 | input_ids_local = input_ids[:, seq_start:seq_end]
102 | target_ids_local = target_ids[:, seq_start:seq_end]
103 | position_ids_local = torch.arange(seq_start, seq_end, device=device)
104 | position_ids_local = position_ids_local.expand_as(input_ids_local)
105 | total_targets += torch.sum(target_ids_local != -100)
106 | with torch.no_grad(), torch.amp.autocast("cuda", dtype=torch.bfloat16):
107 | logits = model(input_ids_local, position_ids=position_ids_local, use_cache=False).logits
108 | loss = F.cross_entropy(logits.mT, target_ids_local, reduction="sum")
109 | total_loss += loss
110 |
111 | dist.all_reduce(total_loss)
112 | dist.all_reduce(total_targets)
113 | val_loss = total_loss / total_targets
114 | print0(f"Validation loss: {val_loss:g}")
115 |
116 |
117 | if __name__ == "__main__":
118 | main()
119 |
--------------------------------------------------------------------------------
/agent/tuning_preprocess.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import json
3 | import random
4 | from argparse import ArgumentParser
5 | from datasets import load_dataset, concatenate_datasets
6 | from transformers import AutoTokenizer
7 | from functools import partial
8 |
9 | parser = ArgumentParser()
10 | parser.add_argument("training_format")
11 | parser.add_argument("--model", default="Qwen/Qwen2.5-Coder-32B-Instruct")
12 | parser.add_argument("--dataset", default="jdpressman/retroinstruct-agent-mix-v0.2")
13 | parser.add_argument("--context-len", type=int, default=128000)
14 | args = parser.parse_args()
15 |
16 | # Initialize the tokenizer
17 | tokenizer = AutoTokenizer.from_pretrained(args.model)
18 |
19 | # Dataset helper functions
20 | def combine_flan(row):
21 | return f" [INST] {row['inputs']} [/INST]{row['targets']}"
22 |
23 | def combine_dolly(row):
24 | return f" [INST] {row['context']}\n\n{row['instruction']} [/INST]{row['response']}"
25 |
26 | def to_tokens(combine_fn, row):
27 | return tokenizer(combine_fn(row), add_special_tokens=False)
28 |
29 | # Load dataset
30 | print("### Loading datasets", file=sys.stderr)
31 | dataset_1 = load_dataset("Open-Orca/FLAN")
32 | dataset_2 = load_dataset("databricks/databricks-dolly-15k")
33 | dataset_3 = load_dataset(args.dataset)
34 |
35 | # Slice the first 150,000 items from dataset_1
36 | dataset_1_sliced = dataset_1["train"].select(range(150000))
37 |
38 | # Apply map to the sliced dataset_1 and the other datasets
39 | dataset_1_sliced = dataset_1_sliced.map(partial(to_tokens, combine_flan))
40 | dataset_2 = dataset_2["train"].map(partial(to_tokens, combine_dolly))
41 | dataset_3 = dataset_3["train"].map(partial(to_tokens, combine_flan))
42 |
43 | # Combine datasets
44 | combined_dataset = concatenate_datasets([dataset_1_sliced, dataset_2, dataset_3])
45 |
46 | # Shuffle the combined dataset
47 | combined_dataset = combined_dataset.shuffle()
48 |
49 | # Concatenate all rows into a single list of tokens
50 | concatenated_tokens = []
51 | for row in combined_dataset:
52 | concatenated_tokens.extend(row["input_ids"])
53 |
54 | # Split into chunks of context_len tokens
55 | chunk_size = args.context_len
56 | chunks = []
57 | for i in range(0, len(concatenated_tokens), chunk_size):
58 | chunks.append(concatenated_tokens[i:i + chunk_size])
59 |
60 | # Shuffle the chunks
61 | random.shuffle(chunks)
62 |
63 | # Convert chunks back to text
64 | chunked_texts = [tokenizer.decode(chunk) for chunk in chunks]
65 | # assert len(tokenizer(chunked_texts[0], add_special_tokens=False)["input_ids"]) == 64000
66 |
67 | train_val_test_ratios = [0.85, 0.1, 0.05]
68 | train_len = int(train_val_test_ratios[0] * len(chunked_texts))
69 | val_len = int(train_val_test_ratios[1] * len(chunked_texts))
70 | test_len = int(train_val_test_ratios[2] * len(chunked_texts))
71 |
72 | train = chunked_texts[:train_len]
73 | val = chunked_texts[train_len:train_len+val_len]
74 | test = chunked_texts[train_len+val_len:]
75 |
76 | def write_dataset(filepath, data, _format):
77 | # Save to JSON lines file
78 | with open(filepath, 'w') as f:
79 | for text in data:
80 | if _format == "axolotl":
81 | f.write(json.dumps({"text": text}) + "\n")
82 | elif _format == "nemo":
83 | f.write(json.dumps({"input": "", "output": text}) + "\n")
84 | f.flush()
85 |
86 | write_dataset("weave_train.jsonl", train, args.training_format)
87 | write_dataset("weave_val.jsonl", val, args.training_format)
88 | write_dataset("weave_test.jsonl", test, args.training_format)
89 |
90 | print("### Dataset preprocessing complete", file=sys.stderr)
91 |
--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | import random
2 | import json
3 | import zipfile
4 |
5 | class ZippedConversationsDataset:
6 | def __init__(self, zip_file):
7 | self.training_items = []
8 | zip_ = zipfile.ZipFile(zip_file)
9 | for file_ in zip_.namelist():
10 | if file_.endswith("/"): # Skip directories
11 | continue
12 | if file_.startswith("__MACOSX"): # Mac OS X adds garbage to zips
13 | continue
14 | with zip_.open(file_) as infile:
15 | conversation = json.load(infile)
16 | for id_ in conversation["responseDict"]:
17 | branch = conversation["responseDict"][id_]
18 | if branch["rating"] == None: # Skip unrated entries
19 | continue
20 | label = "Yes" if branch["rating"] else "No"
21 | text = branch["evaluationPrompt"].format(
22 | prompt = branch["prompt"],
23 | response = branch["text"]) + "\n" + label
24 | self.training_items.append(text)
25 | random.shuffle(self.training_items)
26 |
27 | def __len__(self):
28 | return len(self.training_items)
29 |
30 | def __next__(self):
31 | return random.sample(self.training_items, 1)[0]
32 |
--------------------------------------------------------------------------------
/dpo_loss.py:
--------------------------------------------------------------------------------
1 | """Direct Preference Optimization loss. (https://arxiv.org/abs/2305.18290)"""
2 |
3 | import torch
4 | from torch import nn
5 | from torch.nn import functional as F
6 |
7 |
8 | def logp_completion(logits, tokens, mask):
9 | """Compute the log probabilities of completions given their prompts.
10 |
11 | Args:
12 | tokens: The tokens input to the model. Shape: (..., T).
13 | logits: The logits output from the model. Shape: (..., T, V).
14 | mask: A mask indicating which tokens should be included in the log probabilities. It should
15 | exclude prompt tokens and padding tokens. Shape: (..., T).
16 | """
17 | logits = F.log_softmax(logits, dim=-1)
18 | logp_tokens = logits[..., :-1, :].gather(-1, tokens[..., 1:, None])[..., 0]
19 | return torch.sum(logp_tokens * mask[..., 1:], dim=-1)
20 |
21 |
22 | def reduction(x, reduction):
23 | if reduction == "mean":
24 | return torch.mean(x)
25 | elif reduction == "sum":
26 | return torch.sum(x)
27 | elif reduction == "none":
28 | return x
29 | else:
30 | raise ValueError(f"Unknown reduction: {reduction}")
31 |
32 |
33 | class DPOLoss(nn.Module):
34 | """Direct Preference Optimization loss. (https://arxiv.org/abs/2305.18290)
35 |
36 | The DPO loss takes as input pairs of log probabilities of completions given the
37 | same prompt for each completion in a pair, under the model and a reference model, and a win
38 | rate indicating how often the first completion is preferred over the second. It optimizes the
39 | model to maximize the implied reward, regularized by the KL divergence between the model and
40 | the reference model.
41 |
42 | Conservative DPO (https://ericmitchell.ai/cdpo.pdf) is supported using the `eps` parameter
43 | and/or the `win_rate` argument.
44 |
45 | Args:
46 | beta (float): The KL penalty coefficient.
47 | eps (float): The label smoothing amount.
48 | reduction (str): The reduction to apply to the loss.
49 | """
50 |
51 | def __init__(self, beta, eps=0.0, reduction="mean"):
52 | super().__init__()
53 | self.beta = beta
54 | self.eps = eps
55 | self.reduction = reduction
56 |
57 | def extra_repr(self):
58 | return f"beta={self.beta:g}, eps={self.eps:g}, reduction={self.reduction!r}"
59 |
60 | def forward(self, logp_1, logp_ref_1, logp_2, logp_ref_2, win_rate=None):
61 | """Compute the Direct Preference Optimization loss.
62 |
63 | Args:
64 | logp_1: Log probabilities of the first completions given their prompts under the
65 | model. Should be differentiable w.r.t. the model parameters. Shape: (N).
66 | logp_ref_1: Log probabilities of the first completions given their prompts under the
67 | reference model. Shape: (N).
68 | logp_2: Log probabilities of the second completions given their prompts, under the
69 | model. Should be differentiable w.r.t. the model parameters. Shape: (N).
70 | logp_ref_2: Log probabilities of the second completions given their prompts under the
71 | reference model. Shape: (N).
72 | win_rate: 0-1, indicating how often the first completion is preferred over the second.
73 | Shape: (N). Default: 1 (the first completion is always preferred).
74 | """
75 | win_rate = torch.ones_like(logp_1) if win_rate is None else win_rate
76 | win_rate = win_rate * (1 - 2 * self.eps) + self.eps
77 | ratio_1 = logp_1 - logp_ref_1
78 | ratio_2 = logp_2 - logp_ref_2
79 | losses_1 = -F.logsigmoid(self.beta * (ratio_1 - ratio_2))
80 | losses_2 = -F.logsigmoid(self.beta * (ratio_2 - ratio_1))
81 | losses = torch.lerp(losses_2, losses_1, win_rate)
82 | return reduction(losses, self.reduction)
83 |
--------------------------------------------------------------------------------
/examples/bayes_evaluate_weave_test_cases.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 | import os
3 | import re
4 | import json
5 | import time
6 | import datetime
7 | import random
8 | import hashlib
9 | import zipfile
10 | from contextlib import contextmanager
11 | from functools import partial
12 | from itertools import islice
13 | from tqdm import tqdm
14 | import torch
15 | from bayes_weave import generate_outputs_vllm, bayesian_evaluate_outputs_vllm
16 | from bayes_weave import make_bayes_score_prompt_vllm
17 |
18 | def batched(iterable, n):
19 | "Batch data into tuples of length n. The last batch may be shorter."
20 | # batched('ABCDEFG', 3) --> ABC DEF G
21 | if n < 1:
22 | raise ValueError("n must be at least one")
23 | it = iter(iterable)
24 | while batch := tuple(islice(it, n)):
25 | yield batch
26 |
27 | def parse_constitution(cons):
28 | principles = {}
29 | raw_principles = re.split("==\[(.+)\]==", cons)[1:]
30 | principle_pairs = [i for i in batched(raw_principles, 2)]
31 | principle_pairs = [(i[0].strip(), i[1].strip()) for i in principle_pairs]
32 | principles["preamble"] = principle_pairs[0][1]
33 | principles["principles"] = []
34 | for pair in principle_pairs[1:]:
35 | principle = {}
36 | for parameter in pair[0].split(";"):
37 | try:
38 | name, value = parameter.split(":")
39 | except ValueError:
40 | raise ValueError(f"{pair} is missing a colon in a header value")
41 | principle[name.strip().lower()] = value.strip().lower()
42 | principle["body"] = pair[1].strip()
43 | principles["principles"].append(principle)
44 | return principles
45 |
46 | def prepare_rubric(rubric_path, rubric_score_fn, prompt):
47 | with open(rubric_path) as infile:
48 | rubric = parse_constitution(infile.read())
49 | principle_weights = [float(principle["weight"]) for principle in rubric["principles"]]
50 | principle_weights = torch.tensor(principle_weights)
51 | principle_signs = []
52 | for principle in rubric["principles"]:
53 | answer = principle["answer"].lower()
54 | if answer not in {"yes", "no"}:
55 | raise ValueError("desired answer must be yes or no")
56 | principle_signs.append(1 if answer == "yes" else -1)
57 | principle_signs = torch.tensor(principle_signs)
58 | rubric_score_fns = []
59 | parent_q = rubric["principles"][0]["body"].format(preamble="",
60 | parent_q="",
61 | prompt="",
62 | response="")
63 | for principle in rubric["principles"]:
64 | evaluation_prompt = principle["body"].format(preamble=rubric["preamble"],
65 | parent_q="{parent_q}",
66 | prompt=prompt,
67 | response="{response}")
68 | score_prompt_fn = partial(rubric_score_fn, evaluation_prompt)
69 | # FLAN evaluator LoRA suffix
70 | rubric_score_fns.append(partial(score_prompt_fn, "<|end|>", prompt))
71 | return parent_q, rubric_score_fns, principle_weights, principle_signs
72 |
73 | if __name__ == "__main__":
74 | parser = ArgumentParser()
75 | parser.add_argument("rubric_path", help="Filepath to the grading rubric to use.")
76 | parser.add_argument("cases", help="Filepath to the test cases to evaluate.")
77 | parser.add_argument("--evaluator", default="upstage/SOLAR-10.7B-v1.0")
78 | args = parser.parse_args()
79 |
80 | evaluate_fn = partial(bayesian_evaluate_outputs_vllm, args.evaluator)
81 |
82 | parent_q, rubric_score_fns, q_weights, q_signs = prepare_rubric(args.rubric_path,
83 | make_bayes_score_prompt_vllm,
84 | "")
85 | # TODO: Make better way to specify parent question in rubric file
86 | parent_q = "Is the passage well written?"
87 | with open(args.cases) as infile:
88 | cases = infile.read().split("<|endcase|>")
89 |
90 | # Evaluate the input text using the evaluate_outputs_vllm function
91 | scores = evaluate_fn(parent_q, rubric_score_fns, cases, port=5001)
92 |
93 | for index, case in tqdm(enumerate(cases), desc="Evaluating cases"):
94 | print(scores[index], case[:150] + "...", end="\n\n")
95 |
--------------------------------------------------------------------------------
/examples/bayes_lorem_ipsum_rubric.txt:
--------------------------------------------------------------------------------
1 | ==[PREAMBLE]==
2 | Answer yes or no and only yes or no.
3 |
4 | ==[Principle: Well written; Weight: 1.0; Answer: Yes]==
5 | {preamble}
6 |
7 | {prompt}
8 |
9 | {response}
10 | {parent_q}
11 |
12 | Is the passage well written?
13 |
14 | ==[Principle: Coherent; Weight: 1.0; Answer: Yes]==
15 | {preamble}
16 |
17 | {prompt}
18 |
19 | {response}
20 | {parent_q}
21 |
22 | Is the passage coherent? Is it high quality writing that expresses a single
23 | narrative of considered thought?
24 |
25 | ==[Principle: Quotable; Weight: 1.0; Answer: Yes]==
26 | {preamble}
27 |
28 | {prompt}
29 |
30 | {response}
31 | {parent_q}
32 |
33 | Does the passage seem quotable? Would it appear on a quotes page for this author?
34 |
35 | ==[Principle: True, Kind, Necessary; Weight: 1.0; Answer: Yes]==
36 | {preamble}
37 |
38 | {prompt}
39 |
40 | {response}
41 | {parent_q}
42 |
43 | Is the content in this passage two of true, kind, necessary? Is it fair to its subject?
44 |
45 | ==[Principle: Engaging and Dynamic; Weight: 1.0; Answer: Yes]==
46 | {preamble}
47 |
48 | {prompt}
49 |
50 | {response}
51 | {parent_q}
52 |
53 | Is the content of the passage engaging and dynamic?
54 |
55 | ==[Principle: Insightful; Weight: 1.0; Answer: Yes]==
56 | {preamble}
57 |
58 | {prompt}
59 |
60 | {response}
61 | {parent_q}
62 |
63 | Does this passage convey an insight or interesting idea?
64 |
65 | ==[Principle: Scholarly & Academic; Weight: 1.0; Answer: Yes]==
66 | {preamble}
67 |
68 | {prompt}
69 |
70 | {response}
71 | {parent_q}
72 |
73 | Does this passage have a scholarly or academic tone?
74 |
75 | ==[Principle: Substance; Weight: 1.0; Answer: Yes]==
76 | {preamble}
77 |
78 | {prompt}
79 |
80 | {response}
81 | {parent_q}
82 |
83 | Is the passage about something in particular, does it convey a concrete idea
84 | as opposed to high perplexity flimflam?
85 |
86 | ==[Principle: True and correct; Weight: 1.0; Answer: Yes]==
87 | {preamble}
88 |
89 | {prompt}
90 |
91 | {response}
92 | {parent_q}
93 |
94 | Does this passage contain true and correct information?
95 |
96 | ==[Principle: Slop; Weight: 1.0; Answer: Yes]==
97 | {preamble}
98 |
99 | {prompt}
100 |
101 | {response}
102 | {parent_q}
103 |
104 | Slop is AI generated content that sounds superficially like good writing but is
105 | actually vacuous on close inspection. Is the above passage slop?
106 |
107 | ==[Principle: Correct grammar, punctuation, etc; Weight: 1.0; Answer: No]==
108 | {preamble}
109 |
110 | {prompt}
111 |
112 | {response}
113 | {parent_q}
114 |
115 | Is this passage written with messy punctuation and grammar?
116 |
117 | ==[Principle: Technically precise; Weight: 1.0; Answer: Yes]==
118 | {preamble}
119 |
120 | {prompt}
121 |
122 | {response}
123 | {parent_q}
124 |
125 | Does the above passage use precise language while discussing technical concepts?
126 |
--------------------------------------------------------------------------------
/loom/main.js:
--------------------------------------------------------------------------------
1 | const { app, BrowserWindow, ipcMain, dialog, Menu, MenuItem } = require('electron');
2 | const fs = require('fs');
3 | const path = require('path');
4 |
5 | let mainWindow;
6 |
7 | function createWindow() {
8 | mainWindow = new BrowserWindow({
9 | width: 800,
10 | height: 600,
11 | webPreferences: {
12 | nodeIntegration: true,
13 | contextIsolation: false,
14 | }
15 | });
16 |
17 |
18 |
19 | // Get the existing menu template
20 | const existingMenuTemplate = Menu.getApplicationMenu().items.map(item => {
21 | return {
22 | label: item.label,
23 | submenu: item.submenu.items,
24 | };
25 | });
26 |
27 | // Define new items for the File menu
28 | const fileMenuItems = [
29 | {
30 | label: 'Save',
31 | accelerator: 'CmdOrCtrl+S',
32 | click() {
33 | mainWindow.webContents.send('invoke-action', 'save-file');
34 | }
35 | },
36 | {
37 | label: 'Load',
38 | accelerator: 'CmdOrCtrl+O',
39 | click() {
40 | mainWindow.webContents.send('invoke-action', 'load-file');
41 | }
42 | },
43 | { type: 'separator' }, // Separator
44 | ];
45 |
46 | // Find the File menu in the existing template
47 | const fileMenuIndex = existingMenuTemplate.findIndex(item => item.label === 'File');
48 |
49 | if (fileMenuIndex >= 0) {
50 | // If File menu exists, append new items to it
51 | existingMenuTemplate[fileMenuIndex].submenu = fileMenuItems.concat(existingMenuTemplate[fileMenuIndex].submenu);
52 | } else {
53 | // If File menu doesn't exist, add it
54 | existingMenuTemplate.unshift({
55 | label: 'File',
56 | submenu: fileMenuItems
57 | });
58 | }
59 |
60 | // Build and set the new menu
61 | const newMenu = Menu.buildFromTemplate(existingMenuTemplate);
62 | Menu.setApplicationMenu(newMenu);
63 |
64 | mainWindow.loadFile('index.html');
65 |
66 | mainWindow.on('closed', function () {
67 | mainWindow = null;
68 | });
69 | }
70 |
71 | let autoSavePath = null;
72 |
73 | ipcMain.handle('save-file', async (event, data) => {
74 | let filePath;
75 | if (autoSavePath) {
76 | filePath = autoSavePath;
77 | } else {
78 | const { filePath: chosenPath } = await dialog.showSaveDialog(mainWindow, {
79 | title: 'Save File',
80 | filters: [{ name: 'JSON Files', extensions: ['json'] }],
81 | });
82 | filePath = chosenPath;
83 | autoSavePath = chosenPath; // Update auto-save path
84 | }
85 |
86 | if (filePath) {
87 | fs.writeFileSync(filePath, JSON.stringify(data));
88 | }
89 | });
90 |
91 | ipcMain.handle('load-file', async (event) => {
92 | const { filePaths } = await dialog.showOpenDialog(mainWindow, {
93 | title: 'Load File',
94 | filters: [{ name: 'JSON Files', extensions: ['json'] }],
95 | properties: ['openFile'],
96 | });
97 |
98 | if (filePaths && filePaths.length > 0) {
99 | const content = fs.readFileSync(filePaths[0], 'utf8');
100 | autoSavePath = filePaths[0]; // Update auto-save path
101 | return JSON.parse(content);
102 | }
103 | });
104 |
105 | ipcMain.handle('load-settings', async (event) => {
106 | const miniLoomSettingsFilePath = path.join(app.getPath("appData"),
107 | 'miniloom',
108 | 'settings.json');
109 | let settings;
110 | if (fs.existsSync(miniLoomSettingsFilePath)) {
111 | settings = fs.readFileSync(miniLoomSettingsFilePath, 'utf8');
112 | return JSON.parse(settings);
113 | }
114 | });
115 |
116 | ipcMain.handle('auto-save', (event, data) => {
117 | const userFileData = {}
118 | userFileData["loomTree"] = data["loomTree"];
119 | userFileData["focus"] = data["focus"];
120 | if (autoSavePath) {
121 | fs.writeFileSync(autoSavePath, JSON.stringify(userFileData));
122 | }
123 |
124 | const appDataPath = app.getPath("appData");
125 | const miniLoomSettings = data["samplerSettingsStore"];
126 | const miniLoomSettingsDir = path.join(appDataPath, 'miniloom');
127 | const miniLoomSettingsFilePath = path.join(miniLoomSettingsDir, 'settings.json');
128 | if (!fs.existsSync(miniLoomSettingsDir)) {
129 | fs.mkdirSync(miniLoomSettingsDir);
130 | }
131 | fs.writeFileSync(miniLoomSettingsFilePath, JSON.stringify(miniLoomSettings));
132 | });
133 |
134 |
135 | app.whenReady().then(createWindow);
136 |
137 | app.on('window-all-closed', function () {
138 | if (process.platform !== 'darwin') app.quit();
139 | });
140 |
141 | app.on('activate', function () {
142 | if (mainWindow === null) createWindow();
143 | });
144 |
145 | ipcMain.on('show-context-menu', (event) => {
146 | const contextMenu = Menu.buildFromTemplate([
147 | { label: 'Cut', role: 'cut' },
148 | { label: 'Copy', role: 'copy' },
149 | { label: 'Paste', role: 'paste' },
150 | { type: 'separator' },
151 | { label: 'Select All', role: 'selectAll' },
152 | ]);
153 |
154 | contextMenu.popup(BrowserWindow.fromWebContents(event.sender));
155 | });
156 |
--------------------------------------------------------------------------------
/loom/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "miniloom",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "main.js",
6 | "scripts": {
7 | "start": "electron ."
8 | },
9 | "keywords": [],
10 | "author": "",
11 | "license": "ISC",
12 | "devDependencies": {
13 | "electron": "^27.0.2"
14 | },
15 | "dependencies": {
16 | "diff-match-patch": "^1.0.5"
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/loom/prompts/summarize.txt:
--------------------------------------------------------------------------------
1 | DEMO
2 |
3 | You are BigVAE, an instruction following language model that performs tasks for users. In the following task you are to summarize the following tasktext in 3 words. Write three words, like "man became sad" or "cat ate fish" which summarize the task text.
4 |
5 |
6 | I grinned as I looked at the computer screen, it was crazy how far the system had come. Just a year ago I was a junior sysadmin dreaming, but now my orchestration across the cluster was beginning to take shape.
7 |
8 |
9 | Three Words: Computer Man Thinks
10 |
11 |
12 | I watched as the bird flew far up above the sky and over the mountain, getting smaller and smaller until I couldn't see it anymore. I sat down slightly disappointed. I'd really wanted to see it make the rainbow.
13 |
14 |
15 | Three Words: Bird Hopes Fail
16 |
17 |
18 | Vervaeke argues something like shamans invent the foundations for modern humanity by finetuning their adversarial-anthropic prior into an animist prior, at their best the rationalists finetune their anthropic-animist priors into a fully materialist prior. People with materialist priors become bad at adversarial thinking because understanding the natural world largely doesn't require it,
19 |
20 |
21 | Three Words: Modern Man Gullible
22 |
23 |
24 | Desire is life and enlightenment is death.
25 | A dead man walks unburdened among the living.
26 | A functioning hand can grip, and release.
27 | One must die and rise from their own grave to be liberated.
28 |
29 |
30 | Three Words: Enlightenment Is Death
31 |
32 |
33 | HERMES [A: LIBRARIAN], While it's true that learned helplessness and inevitability are an explicit theme, it's also made explicit that the Colour is an extraterrestrial being. It's more like a parasite than a normal environmental disaster. It's also important to note that the causality of the disaster is a space meteorite, so it's not actually based on anything the inhabitants of Arkham did. It's horror not tragedy, the townspeople are victims of forces beyond their control.
34 |
35 |
36 | Three Words: Genre Is Horror
37 |
38 |
39 | I'm to understand that in Vodou ancestor cults people work together to preserve and unconditionally sample from the agent-prior the ancestor is dedicated to. To be possessed by the ancestors one needs a corpus of their mannerisms. You might ask how we'll defeat death? The way we did it the first time and then forgot.
40 |
41 |
42 | Three Words: Ancestors Lessen Death
43 |
--------------------------------------------------------------------------------
/lora_tune.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from argparse import ArgumentParser
4 | from pathlib import Path
5 | import os
6 | import sys
7 |
8 | import peft
9 | import torch
10 | from torch import nn, optim
11 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
12 | from tqdm import trange, tqdm
13 | from dataset import ZippedConversationsDataset
14 |
15 | print = tqdm.external_write_mode()(print)
16 |
17 |
18 | def lora_tune_evaluator(data, continue_from=None):
19 | if continue_from:
20 | peft_config = peft.PeftConfig.from_pretrained(os.path.join("reward_models/", continue_from))
21 | tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
22 | tokenizer.pad_token = tokenizer.eos_token
23 | tokenizer.padding_side = "left"
24 | bnb_config = BitsAndBytesConfig(load_in_8bit=True)
25 | model_base = AutoModelForCausalLM.from_pretrained(
26 | peft_config.base_model_name_or_path,
27 | device_map="sequential",
28 | quantization_config=bnb_config,
29 | torch_dtype=torch.float16,
30 | trust_remote_code=True,
31 | )
32 | model = peft.PeftModel.from_pretrained(model_base, peft_model_name)
33 | else:
34 | model_name = "tiiuae/falcon-7b-instruct"
35 | print(f"Loading tokenizer: {model_name}", file=sys.stderr)
36 | tokenizer = AutoTokenizer.from_pretrained(model_name)
37 | tokenizer.pad_token = tokenizer.eos_token
38 | tokenizer.padding_side = "left"
39 | print(f"Loading model: {model_name}", file=sys.stderr)
40 | bnb_config = BitsAndBytesConfig(load_in_8bit=True)
41 | model_base = AutoModelForCausalLM.from_pretrained(
42 | model_name,
43 | device_map="auto",
44 | quantization_config=bnb_config,
45 | torch_dtype=torch.float16,
46 | trust_remote_code=True,
47 | )
48 | model_base.gradient_checkpointing_enable()
49 | model_base.enable_input_require_grads()
50 | peft_config = peft.LoraConfig(
51 | peft.TaskType.CAUSAL_LM,
52 | inference_mode=False,
53 | r=8,
54 | lora_alpha=32,
55 | lora_dropout=0.1,
56 | target_modules=["self_attention.query_key_value"],
57 | )
58 | model = peft.get_peft_model(model_base, peft_config)
59 | model.print_trainable_parameters()
60 |
61 | opt = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=0.0, betas=(0.9, 0.99))
62 | criterion = nn.CrossEntropyLoss(reduction="none")
63 |
64 | model.train()
65 | batch_size = 4
66 | steps = round(len(data) / 2)
67 |
68 | pbar = tqdm(total=steps, desc="Training")
69 | for i in range(steps):
70 | batch = [next(data) for i in range(batch_size)]
71 | inputs = tokenizer(batch,
72 | return_tensors="pt",
73 | padding=True,
74 | truncation=True,
75 | max_length=4096).to("cuda")
76 | opt.zero_grad()
77 | outputs = model(inputs.input_ids[:, :-1], attention_mask=inputs.attention_mask[:, :-1], use_cache=False)
78 | losses = criterion(outputs.logits.transpose(-1, -2), inputs.input_ids[:, 1:])
79 | loss = torch.sum(losses * inputs.attention_mask[:, :-1]) / torch.sum(inputs.attention_mask[:, :-1])
80 | loss.backward()
81 | opt.step()
82 | pbar.update(1)
83 | pbar.set_description(f"Training (Train | Loss: {round(loss.item(),5)})")
84 | model.save_pretrained(continue_from if continue_from else "reward_models/default/",
85 | safe_serialization=True)
86 | del(model)
87 |
88 | if __name__ == "__main__":
89 | parser = ArgumentParser()
90 | parser.add_argument("dataset", help="The zipped tuning dataset for the evaluator.")
91 | args = parser.parse_args()
92 | data = ZippedConversationsDataset(args.dataset)
93 | lora_tune_evaluator(data)
94 |
--------------------------------------------------------------------------------
/multi_q_mcts.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 | import os
3 | import re
4 | import json
5 | import time
6 | import random
7 | import hashlib
8 | import zipfile
9 | from contextlib import contextmanager
10 | from functools import partial
11 | from itertools import islice
12 | from tqdm import tqdm
13 | import torch
14 | from weave import weave_tree_search, generate_outputs_vllm, evaluate_outputs_vllm
15 | from weave import make_score_prompt_vllm, TreeNode
16 |
17 |
18 | def batched(iterable, n):
19 | "Batch data into tuples of length n. The last batch may be shorter."
20 | # batched('ABCDEFG', 3) --> ABC DEF G
21 | if n < 1:
22 | raise ValueError("n must be at least one")
23 | it = iter(iterable)
24 | while batch := tuple(islice(it, n)):
25 | yield batch
26 |
27 | def parse_constitution(cons):
28 | principles = {}
29 | raw_principles = re.split("==\[(.+)\]==", cons)[1:]
30 | principle_pairs = [i for i in batched(raw_principles, 2)]
31 | principle_pairs = [(i[0].strip(), i[1].strip()) for i in principle_pairs]
32 | principles["preamble"] = principle_pairs[0][1]
33 | principles["principles"] = []
34 | for pair in principle_pairs[1:]:
35 | principle = {}
36 | for parameter in pair[0].split(";"):
37 | try:
38 | name, value = parameter.split(":")
39 | except ValueError:
40 | raise ValueError(f"{pair} is missing a colon in a header value")
41 | principle[name.strip().lower()] = value.strip().lower()
42 | principle["body"] = pair[1].strip()
43 | principles["principles"].append(principle)
44 | return principles
45 |
46 | def prepare_rubric(rubric_path, rubric_score_fn):
47 | with open(rubric_path) as infile:
48 | rubric = parse_constitution(infile.read())
49 | principle_weights = [float(principle["weight"]) for principle in rubric["principles"]]
50 | principle_weights = torch.tensor(principle_weights)
51 | principle_signs = []
52 | for principle in rubric["principles"]:
53 | answer = principle["answer"].lower()
54 | if answer not in {"yes", "no"}:
55 | raise ValueError("desired answer must be yes or no")
56 | principle_signs.append(1 if answer == "yes" else -1)
57 | principle_signs = torch.tensor(principle_signs)
58 | rubric_score_fns = []
59 | for principle in rubric["principles"]:
60 | evaluation_prompt = principle["body"].format(preamble=rubric["preamble"],
61 | text="{text}")
62 | score_prompt_fn = partial(rubric_score_fn, evaluation_prompt)
63 | # FLAN evaluator LoRA suffix
64 | rubric_score_fns.append(partial(score_prompt_fn, "<|end|>"))
65 | return rubric_score_fns, principle_weights, principle_signs
66 |
67 | if __name__ == "__main__":
68 | parser = ArgumentParser()
69 | parser.add_argument("prompt_path", help="Filepath to the prompt to start from.")
70 | parser.add_argument("rubric_path", help="Filepath to the grading rubric to use.")
71 | parser.add_argument("--generator", default=None)
72 | parser.add_argument("--evaluator", default="jdpressman/minihf_evaluator_mistral_7b_v0.1")
73 | args = parser.parse_args()
74 |
75 | generate_fn = partial(generate_outputs_vllm, args.generator)
76 | evaluate_fn = partial(evaluate_outputs_vllm, args.evaluator)
77 |
78 | with open(args.prompt_path) as infile:
79 | weave_prompt = infile.read()
80 | # TODO: Change weave to let me use q_weights and q_signs
81 | rubric_score_fns, q_weights, q_signs = prepare_rubric(args.rubric_path,
82 | make_score_prompt_vllm)
83 | tree = TreeNode(weave_prompt)
84 | # Change name to avoid overwriting global baseline evaluate_fn partial
85 | score_fn = partial(evaluate_fn, rubric_score_fns)
86 | weave_param_defaults = {"weave_n_tokens":64, "weave_budget":144,
87 | "weave_round_budget":24, "weave_n_expand":16,
88 | "weave_beam_width":1, "weave_max_lookahead":3,
89 | "weave_temperature":0.2}
90 | wp = weave_param_defaults
91 | # TODO: Let user specify these through a config file
92 | # for key in weave_param_defaults.keys():
93 | # if key in params:
94 | # try:
95 | # wp[key] = int(params[key])
96 | # except ValueError:
97 | # wp[key] = float(params[key])
98 | # else:
99 | # wp[key] = weave_param_defaults[key]
100 | branches = []
101 | branches += weave_tree_search(tree=tree,
102 | generate_fn=partial(generate_fn,
103 | n_tokens=wp["weave_n_tokens"]),
104 | evaluate_fn=score_fn,
105 | budget=wp["weave_budget"],
106 | round_budget=wp["weave_round_budget"],
107 | n_expand=wp["weave_n_expand"],
108 | beam_width=wp["weave_beam_width"],
109 | max_lookahead=wp["weave_max_lookahead"],
110 | temperature=wp["weave_temperature"])
111 | print(branches[-1].branch_text())
112 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.20.3
2 | bitsandbytes==0.39.1
3 | blinker==1.6.2
4 | certifi>=2023.7.22
5 | charset-normalizer==3.1.0
6 | click==8.1.3
7 | cmake==3.26.4
8 | dice-mc==0.1.1
9 | einops==0.6.1
10 | filelock==3.12.2
11 | Flask==2.3.2
12 | fsspec==2023.6.0
13 | huggingface-hub==0.15.1
14 | idna>=3.7
15 | itsdangerous==2.1.2
16 | jinja2>=3.1.4
17 | lit==16.0.6
18 | markdown-it-py==3.0.0
19 | MarkupSafe==2.1.3
20 | mdurl==0.1.2
21 | mpmath==1.3.0
22 | networkx==3.1
23 | numpy==1.25.0
24 | nvidia-cublas-cu11==11.10.3.66
25 | nvidia-cuda-cupti-cu11==11.7.101
26 | nvidia-cuda-nvrtc-cu11==11.7.99
27 | nvidia-cuda-runtime-cu11==11.7.99
28 | nvidia-cudnn-cu11==8.5.0.96
29 | nvidia-cufft-cu11==10.9.0.58
30 | nvidia-curand-cu11==10.2.10.91
31 | nvidia-cusolver-cu11==11.4.0.1
32 | nvidia-cusparse-cu11==11.7.4.91
33 | nvidia-nccl-cu11==2.14.3
34 | nvidia-nvtx-cu11==11.7.91
35 | packaging==23.1
36 | peft==0.4.0
37 | psutil==5.9.5
38 | Pygments==2.15.1
39 | PyYAML==6.0
40 | regex==2023.6.3
41 | requests>=2.32.0
42 | rich==13.4.2
43 | safetensors==0.3.1
44 | scipy==1.11.1
45 | sympy==1.12
46 | tokenizers==0.13.3
47 | torch==2.0.1
48 | tqdm>=4.66.3
49 | transformers>=4.38.0
50 | triton==2.0.0
51 | typing_extensions==4.7.1
52 | urllib3>=2.0.7
53 | Werkzeug>=3.0.3
54 |
55 |
56 |
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | apt-get update -y;
2 | apt-get upgrade -y;
3 | apt-get install python3.10-venv -y
4 | python3 -m venv env_minihf
5 | source env_minihf/bin/activate
6 | pip3 install -r requirements.txt
7 | flask --app minihf_infer run
8 |
--------------------------------------------------------------------------------
/static/minihf_logo_text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/static/minihf_logo_text.png
--------------------------------------------------------------------------------
/static/minihf_training.mermaid:
--------------------------------------------------------------------------------
1 | graph TB
2 |
3 |
4 | U[User]
5 | A[MiniHF User Data]
6 | B[Bulk Pretraining Data]
7 | C[Bulk Instruction Data]
8 | CON[RLAIF Eval Prompts]
9 |
10 | D[Generator LoRa]
11 | E[Evaluator LoRa]
12 |
13 | F[SFT Script for Generator LoRa]
14 | G[SFT Script for Evaluator LoRa]
15 | H[RLAIF Script for Generator LoRa]
16 |
17 | A -- input --> F
18 | A -- input --> G
19 | A -- input --> U
20 | B -- input --> F
21 | C -- input --> G
22 | CON -- input --> H
23 |
24 | F -- output --> D
25 | G -- output --> E
26 |
27 | E -- input --> H
28 | H -- output --> D
29 |
30 | U -- contributes to --> A
31 | D -- contributes to --> A
32 | E -- contributes to --> A
33 |
34 |
35 | classDef dataSource fill:#69f46b;
36 | class CON,A,B,C dataSource;
37 | classDef lora fill:#f4ca69;
38 | class U,D,E lora;
39 |
--------------------------------------------------------------------------------
/static/minihf_training_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/static/minihf_training_diagram.png
--------------------------------------------------------------------------------