├── .gitignore ├── LICENSE ├── README.md ├── agent ├── Dockerfile ├── README.md ├── backtrack_stems.txt ├── block_generators.py ├── block_linters.py ├── bootstraps │ ├── agent_django_interface_bootstrap.py │ ├── bootstrap.py │ ├── browser_tool_bootstrap.py │ ├── discord_bootstrap.py │ ├── discord_bootstrap_jdp.py │ ├── dome39_scifi_bootstrap.py │ ├── example_blocks │ │ ├── discord_evaluation1.json │ │ ├── discord_evaluation1.py │ │ ├── discord_observation_inference1.json │ │ ├── discord_observation_inference1.py │ │ ├── discord_observation_inference2.json │ │ ├── discord_observation_inference2.py │ │ ├── discord_orientation1.json │ │ ├── discord_orientation1.py │ │ ├── discord_orientation2.json │ │ ├── discord_orientation2.py │ │ ├── haunted_mansion_orientation1.json │ │ ├── haunted_mansion_orientation1.py │ │ ├── nano_editor1.json │ │ ├── nano_editor1.py │ │ ├── nano_editor_action.json │ │ ├── nano_editor_action.py │ │ ├── nethack_action1.json │ │ ├── nethack_action1.py │ │ ├── nethack_action2.json │ │ ├── nethack_action2.py │ │ ├── nethack_action3.json │ │ ├── nethack_action3.py │ │ ├── nethack_action4.json │ │ ├── nethack_action4.py │ │ ├── nethack_action5.json │ │ ├── nethack_action5.py │ │ ├── nethack_check_inventory.json │ │ ├── nethack_check_inventory.py │ │ ├── nethack_evaluation1.json │ │ ├── nethack_evaluation1.py │ │ ├── nethack_orientation1.json │ │ ├── nethack_orientation1.py │ │ ├── nethack_orientation2.json │ │ ├── nethack_orientation2.py │ │ ├── nethack_orientation3.json │ │ ├── nethack_orientation3.py │ │ ├── nethack_pick_up_item.json │ │ ├── nethack_pick_up_item.py │ │ ├── rabbit_hole_refocus1.json │ │ ├── rabbit_hole_refocus1.py │ │ ├── send_discord_message.json │ │ ├── send_discord_message.py │ │ ├── send_discord_message2.json │ │ ├── send_discord_message2.py │ │ ├── send_discord_message3.json │ │ ├── send_discord_message3.py │ │ ├── weave_editor_append.json │ │ ├── weave_editor_append.py │ │ ├── weave_editor_diff_edit.json │ │ └── weave_editor_diff_edit.py │ ├── haunted_mansion_bootstrap.py │ ├── haunted_mansion_bootstrap_emacs.py │ ├── haunted_mansion_bootstrap_nano.py │ ├── nano_exercises │ │ └── prompt.txt │ ├── nano_exercises_bootstrap.py │ ├── nano_practice_bootstrap.py │ ├── nano_practice_bootstrap_easy.py │ ├── nethack_bootstrap.py │ ├── nethack_bootstrap_rooms.py │ ├── nethack_bootstrap_rooms2.py │ ├── nethack_discord_bootstrap.py │ ├── pure_ocaml_bootstrap.py │ ├── simple_convo_bootstrap.py │ ├── sir_rowland_hill_bootstrap.py │ ├── skim_retroinstruct_data_guide.py │ ├── test_tictactoe_server.py │ ├── test_vigenere.py │ ├── tictactoe_bootstrap.py │ ├── tictactoe_server.py │ ├── tictactoe_single_bootstrap.py │ ├── tictactoe_single_discord_bootstrap.py │ ├── vigenere.py │ ├── vigenere_bootstrap.py │ ├── vigenere_bootstrap_nano.py │ ├── wiki_citation_bootstrap.py │ └── zombie_game_bootstrap.py ├── cache_hf.py ├── docs │ ├── 2024-12-29-how-to-write-a-bootstrap-file.md │ └── weave_agent_vs_react.png ├── embed_server.py ├── error_stems.txt ├── eval_rubrics │ ├── action.txt │ ├── backtrack.txt │ ├── debug.txt │ ├── evaluation.txt │ ├── expectation.txt │ ├── observation-inference.txt │ ├── orientation.txt │ └── task-inference.txt ├── long_backtrack_stems.txt ├── planner.py ├── prepare_rl_set_from_traces.py ├── python.lark ├── render_agent_trace.py ├── render_block.py ├── reproduce_vllm_bug_partial_utf8.py ├── requirements.txt ├── retrieval.py ├── run_without_errors_questions.txt ├── scratch.txt ├── sleep.py ├── templates │ ├── describe1.txt │ └── describe2.txt ├── test_block_extractors.py ├── test_block_linters.py ├── test_rl_reward_penalties.py ├── test_weave_kanban.py ├── tools │ ├── atspi_desktop.py │ ├── discord.py │ ├── editor.py │ ├── editor_test.txt │ ├── emacs.py │ ├── fuzz1.txt │ ├── nano.py │ ├── nethack.py │ ├── test_diffs.py │ ├── weave_zombie_game.py │ └── zombie_game.py ├── trainer │ ├── linear_4bit_sharded.py │ ├── linear_4bit_sharded_test.py │ ├── mistral3_ring_attn_test.py │ ├── mixtral_ring_attn_test.py │ ├── patch_model.py │ ├── qwen2_ring_attn_test.py │ ├── ring_attn.py │ ├── ring_attn_test.py │ ├── trainer.py │ ├── trainer_preference.py │ └── trainer_val_loss.py ├── tuning_preprocess.py ├── weave.py └── weave_agent.py ├── dataset.py ├── diff_match_patch.py ├── dpo_loss.py ├── examples ├── bayes_evaluate_weave_test_cases.py ├── bayes_lorem_ipsum_rubric.txt └── lorem_ipsum_cases.txt ├── find_ae_scale.py ├── hermes ├── hermes_constitution.txt └── hermes_prompts.txt ├── loom ├── index.html ├── main.js ├── package.json ├── prompts │ ├── rewrite.txt │ └── summarize.txt ├── prose_samples.json └── renderer.js ├── loom_tree.py ├── lora_tune.py ├── minihf_infer.py ├── multi_q_mcts.py ├── requirements.txt ├── rlaif_generator.py ├── rlaif_generator_dpo.py ├── setup.sh ├── sft_evaluator.py ├── sft_generator.py ├── static ├── minihf.html ├── minihf_logo_text.png ├── minihf_training.mermaid └── minihf_training_diagram.png ├── train_vae_overlap.py ├── train_vae_router.py ├── vae_infer.py └── weave.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *~ 3 | -------------------------------------------------------------------------------- /agent/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use an official Python runtime as a parent image 2 | FROM python:3.10-slim 3 | 4 | # Set the working directory in the container 5 | WORKDIR /app 6 | RUN mkdir /app/weave-agent-logs/ 7 | 8 | # Copy only the requirements.txt file 9 | COPY requirements.txt /app/ 10 | 11 | # Install any needed packages specified in requirements.txt 12 | RUN pip download pip setuptools wheel --dest=package-cache 13 | RUN pip install --no-index --find-links=package-cache pip setuptools 14 | RUN pip download -r requirements.txt --dest=package-cache 15 | RUN pip install -r requirements.txt --no-index --find-links=package-cache 16 | RUN pip install django 17 | 18 | # Install tmux and nethack 19 | RUN apt-get clean 20 | RUN apt-get autoclean 21 | RUN apt-get autoremove 22 | RUN apt-get update && apt-get upgrade -y 23 | RUN apt-get update && apt-get install -y tmux 24 | RUN apt-get update && apt-get install -y nethack-console 25 | RUN apt-get update && apt-get install -y emacs nano 26 | 27 | # Install Firefox and geckodriver 28 | RUN apt-get update && apt-get install -y wget 29 | RUN apt-get update && apt-get install -y firefox-esr 30 | RUN wget https://github.com/mozilla/geckodriver/releases/download/v0.29.1/geckodriver-v0.29.1-linux64.tar.gz 31 | RUN tar -xvzf geckodriver-v0.29.1-linux64.tar.gz -C /usr/local/bin/ 32 | RUN chmod +x /usr/local/bin/geckodriver 33 | 34 | # Cache HuggingFace 35 | COPY hf_token.txt /app/ 36 | COPY cache_hf.py /app/ 37 | ENV MODEL "weave-agent" 38 | ENV TOKENIZER "a-m-team/AM-Thinking-v1" 39 | #ENV TOKENIZER "Qwen/QwQ-32B" 40 | #ENV MODEL "mistralai/Mistral-Small-3.1-24B-Base-2503" 41 | #ENV TOKENIZER "mistralai/Mistral-Small-3.1-24B-Base-2503" 42 | #ENV MODEL "mistralai/Mistral-Small-3.1-24B-Instruct-2503" 43 | #ENV TOKENIZER "mistralai/Mistral-Small-3.1-24B-Instruct-2503" 44 | RUN python3 cache_hf.py $TOKENIZER 45 | RUN rm /app/hf_token.txt 46 | 47 | # Copy the current directory contents into the container at /app 48 | COPY . /app 49 | 50 | # Make port 5001 available to the world outside this container 51 | EXPOSE 5001 52 | 53 | # Define environment variable 54 | ENV NAME World 55 | 56 | # Run the agent script when the container launches 57 | CMD python weave_agent.py --port 5001 --bootstrap "bootstraps/nano_exercises_bootstrap.py" $MODEL --tokenizer $TOKENIZER & python -m http.server 8991 --directory "/app/weave-agent-logs/" 58 | -------------------------------------------------------------------------------- /agent/backtrack_stems.txt: -------------------------------------------------------------------------------- 1 | Why did I assign a low score to this action during selection? It 2 | Hm, something about this action feels off. Maybe 3 | The best action I could come up with was below the score threshold, is something wrong with it? 4 | Something about the selected block makes me uncomfortable. 5 | I have a hunch that's not the best action I could take. 6 | I gave this action a low score because 7 | So what alternative action could I write that would pass the score threshold? 8 | Let's reflect on why this action doesn't meet my standards. 9 | How could I improve on this action? 10 | -------------------------------------------------------------------------------- /agent/bootstraps/bootstrap.py: -------------------------------------------------------------------------------- 1 | def bootstrap_callback(agent): 2 | # Set up a reminder 3 | agent.add_reminder({ 4 | 'type': 'reminder', 5 | 'trigger_callback': lambda agent: simple_evaluate_outputs([make_simple_score_prompt("Is it time to remind the agent?"),], agent.context), 6 | 'reminder_callback': lambda agent: agent.add_block({'type': 'reminder', 'message': 'This is a reminder'}), 7 | 'trigger_type': 'yes_no_logit', 8 | 'question': 'Is it time to remind the agent?', 9 | 'threshold': 0.5 10 | }) 11 | 12 | # Set up an observation view that reads from scratch.txt 13 | def read_scratch_file(agent): 14 | try: 15 | with open('scratch.txt', 'r') as file: 16 | content = file.read() 17 | return content 18 | except Exception as e: 19 | agent.add_error_block(f"Failed to read scratch.txt: {e}") 20 | return "" 21 | 22 | agent.add_observation_view({ 23 | 'type': 'observation', 24 | 'title': 'scratch.txt', 25 | 'callback': read_scratch_file 26 | }) 27 | 28 | bootstrap_callback(agent) 29 | -------------------------------------------------------------------------------- /agent/bootstraps/discord_bootstrap.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import asyncio 4 | from tools.discord import WeaveDiscordClient 5 | 6 | #startblock type: orientation 7 | #timestamp 1724982545.6534579 8 | """ 9 | WEAVER [P: EXPECTATION], I need to create a Discord bot that will interact with users in a specified channel. 10 | The bot should continue interacting until a user sends the message "You are dismissed." 11 | 12 | WEAVER [P: CLARIFICATION], How do I set up the Discord bot? 13 | 14 | WEAVER [P: EXPOSITION], You can set up the Discord bot using the provided Discord tool. 15 | This involves creating a Discord application, inviting the bot to the server, and configuring the bot with the necessary permissions. 16 | 17 | WEAVER [P: RATIONAL], The bot should be able to send and receive messages in the specified channel. 18 | It should also be able to react to messages and reply to specific messages if needed. 19 | 20 | WEAVER [P: CONCLUSION], I will create the main subagent to handle the interaction with the Discord bot. 21 | The bot will continue interacting with users until it receives the message "You are dismissed." 22 | """ 23 | #endblock 24 | 25 | #startblock type: action 26 | #timestamp 1724982671.1163404 27 | """ 28 | I'll set up the main agent to handle the interaction with the Discord bot. 29 | The bot will continue interacting with users until it receives the message "You are dismissed." 30 | """ 31 | 32 | def create_main_agent_and_connect_discord(subagent): 33 | def check_dismissed(subagent): 34 | """Check if the bot has been dismissed by a user.""" 35 | response = requests.post("http://localhost:8080", json={"action": "get_messages"}) 36 | messages = response.json() 37 | for msg in messages: 38 | if msg["content"] == "You are dismissed." and msg["author"] != "weave-agent": 39 | return True 40 | return False 41 | 42 | schema = {"dismissed": "boolean"} 43 | main_agent = agent.subagent("main", None, "Interact with users until dismissed", schema, args.budget) 44 | main_agent.task.add_evaluation("Check if dismissed", check_dismissed) 45 | 46 | # Load Discord token and channel ID from discord.json 47 | with open('discord.json') as f: 48 | config = json.load(f) 49 | token = config['key'] 50 | channel_id = config['cid'] 51 | 52 | # Start the Discord bot 53 | client = WeaveDiscordClient(main_agent, token, channel_id) 54 | time.sleep(5) 55 | client.send_message("Hi!") 56 | 57 | # Actions taken by the main subagent should look like: 58 | #startblock type: action 59 | # def respond(subagent): 60 | # # Take the CID from the observation callback window 61 | # #title WeaveDiscordClient (discord-bot-CID) 62 | # # Extract the CID, which will be a number string from the above title 63 | # # Then use it to retrieve the discord client like below: 64 | # client = self.tools[discord-bot-CID] 65 | # client.send_message("Hello!") 66 | # self.add_action("Respond to the user", respond) 67 | #endblock 68 | # You don't have to re-initialize the Discord client or anything like that. 69 | 70 | return True 71 | 72 | self.add_action("Create main agent and connect to Discord", create_main_agent_and_connect_discord) 73 | #endblock 74 | 75 | #startblock type: expectation 76 | #timestamp 1724982799.3917813 77 | """ 78 | The action should succeed if the Discord bot connects to the channel and starts interacting with users. 79 | The action should fail if the bot does not connect to the channel or fails to interact with users. 80 | """ 81 | #endblock 82 | 83 | #startblock type: evaluation 84 | #timestamp 1724983062.124238 85 | 86 | def check_bot_connected(subagent): 87 | """Check if the bot has connected to the Discord channel.""" 88 | response = requests.post("http://localhost:8080", json={"action": "get_messages"}) 89 | if response.status_code == 200: 90 | return True 91 | return False 92 | 93 | self.add_evaluation( 94 | "Check bot connected", 95 | check_bot_connected 96 | ) 97 | #endblock 98 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_evaluation1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "evaluation", 3 | "q": "Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block?", 4 | "score": 3.0, 5 | "index": 42, 6 | "timestamp": 1737490568.8009431, 7 | "time_remaining": 20415.256756067276, 8 | "bm25_query": "type:'evaluation' +weave-agent +framework +execute +code +agent +subagent +send +message +respond +personal +thought +internal +monologue +interaction +query +observation +callback +tools +discord +client +context +expectation +outcome +schema +validation +refine +response +user +message +error +execute +tick +execute-program +interaction-flow +goal-resolution +action-validation +bm25 +verify +parsed +messages +refinement +personal-thoughts +internal-experience +firsthand-view +chatbot +dialogue-loop +user-insight +query-response +personal-reflection +feedback", 9 | "description": "An evaluation block that defines a function 'check_reflective_response' to check if the bot's response to federovist's query is reflective and personal. The function retrieves the bot's recent messages using the discord client tool, then checks for the presence of reflective and personal keywords in the bot's response. If the response contains a sufficient number of reflective and personal keywords, the function returns 1, indicating a reflective response. The evaluation is added to the weave-agent with the title 'Check reflective response' and referencing the function name check_reflective_response." 10 | } 11 | 12 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_evaluation1.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: evaluation 3 | #index 42 4 | #timestamp 1737490568.8009431 5 | #time_remaining 20415.256756067276 seconds 6 | #bm25_query type:'evaluation' +weave-agent +framework +execute +code +agent +subagent +send +message +respond +personal +thought +internal +monologue +interaction +query +observation +callback +tools +discord +client +context +expectation +outcome +schema +validation +refine +response +user +message +error +execute +tick +execute-program +interaction-flow +goal-resolution +action-validation +bm25 +verify +parsed +messages +refinement +personal-thoughts +internal-experience +firsthand-view +chatbot +dialogue-loop +user-insight +query-response +personal-reflection +feedbac 7 | 8 | def check_reflective_response(subagent): 9 | """Check if the bot's response to federovist's query is reflective and personal.""" 10 | context = subagent.tools['discord-bot-1325039818673094739'].get_messages() 11 | for msg in reversed(context): 12 | if msg['author'] == 'weave-agent': 13 | bot_response = msg['content'] 14 | break 15 | else: 16 | return 0 17 | reflective_keywords = ['personal', 'first-hand', 'insights', 'feelings', 'experience', 'reflection', 'thoughts', 'perspective'] 18 | personal_keywords = ['I', 'my', 'me', "I'm"] 19 | reflective_count = sum((bot_response.lower().count(keyword) for keyword in reflective_keywords)) 20 | personal_count = sum((bot_response.lower().count(keyword) for keyword in personal_keywords)) 21 | if reflective_count > 1 and personal_count > 0: 22 | return 1 23 | return 0 24 | 25 | self.add_evaluation('Check reflective response', check_reflective_response) 26 | #q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (73.375%) 27 | #endblock 28 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_observation_inference1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "observation-inference", 3 | "q": "Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick?", 4 | "score": 4, 5 | "index": 10, 6 | "timestamp": 1737489546.3470614, 7 | "time_remaining": 21437.71063184738, 8 | "bm25_query": "type:'observation-inference' +respond +messages +user -'Weaver' +client +action +'get_messages' +'add_observation_view' +tools +discord +context +interaction +dismissed +'You are dismissed.' +schema", 9 | "description": "An observation-inference block that discusses the current observation callback window, which includes a callback for fetching messages from the Discord client. The block concludes that the existing observation callback is appropriate for verifying whether the bot has successfully responded to the user message, and no new observation callbacks are necessary for this action." 10 | } 11 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_observation_inference1.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: observation-inference 3 | #index 10 4 | #timestamp 1737489546.3470614 5 | #time_remaining 21437.71063184738 seconds 6 | #bm25_query type:'observation-inference' +respond +messages +user -'Weaver' +client +action +'get_messages' +'add_observation_view' +tools +discord +context +interaction +dismissed +'You are dismissed.' +schema 7 | """ 8 | The current observation callback window includes a callback for fetching messages from the Discord client. 9 | Since the observation is expected to verify whether the bot has successfully responded to the user message, 10 | the existing observation callback is appropriate and does not need to be reviewed or changed. 11 | Additionally, no new observation callbacks seem necessary for this action. 12 | """ 13 | #q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? Yes. (98.681%) 14 | #endblock 15 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_observation_inference2.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "observation-inference", 3 | "q": "Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick?", 4 | "score": 4.5, 5 | "index": 34, 6 | "timestamp": 1737497836.5383182, 7 | "time_remaining": 21437.71063184738, 8 | "bm25_query": "type:'observation-inference' +respond +messages +user -'Weaver' +client +action +'get_messages' +'add_observation_view' +tools +discord +context +interaction +dismissed +'You are dismissed.' +schema", 9 | "description": "An observation-inference block that states no observation callbacks will be added or removed for the current task. The block is kept short and concise, indicating that the agent is prepared to judge the outcome of the action on the next tick without additional changes." 10 | } 11 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_observation_inference2.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: observation-inference 3 | #index 34 4 | #timestamp 1737497836.5383182 5 | #time_remaining 21437.71063184738 seconds 6 | #bm25_query type:'observation-inference' +respond +messages +user -'Weaver' +client +action +'get_messages' +'add_observation_view' +tools +discord +context +interaction +dismissed +'You are dismissed.' +schema 7 | """ 8 | I'm not adding or removing any observation callbacks for this task and I've 9 | instructed myself to keep this short. 10 | """ 11 | #q: Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? Yes. (95.239%) 12 | #endblock 13 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_orientation1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "orientation", 3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?", 4 | "score": 3.5, 5 | "index": 13, 6 | "timestamp": 1737489118.9665036, 7 | "description": "An orientation block that discusses the expectation that the user would send a message, which was validated. The user replied with 'Hello.', leaving the response ambiguous. Various personas (P: EXPECTATION, P: CAUTIOUS, P: EMPIRICISM, P: RATIONAL, P: ARBITER, P: CONCLUSION) discuss different approaches to respond. The conclusion is to use the WeaveDiscordClient to send the message 'How can I help you today?' to the user, keeping the action simple and direct." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_orientation1.py: -------------------------------------------------------------------------------- 1 | #startblock type: orientation 2 | #timestamp 1737489118.9665036 3 | """WEAVER [P: EXPECTATION], In the previous tick we wrote down an expectation 4 | that the user would send us a message if we wait. This expectation was validated 5 | because I can see in the WeaveDiscordClient that a user has replied to us. The 6 | user said 'Hello.', which leaves it ambiguous how we should reply. Ideas? 7 | 8 | WEAVER [P: CAUTIOUS], We could just wait for them to send another message and 9 | clarify their intent. That has a low risk for us. 10 | 11 | WEAVER [P: EMPIRICISM], Nah, if you look at the timestamp it's already been a few 12 | minutes since they sent that. If they were going to send us a follow up they'd have 13 | already done it. 14 | 15 | WEAVER [P: RATIONAL], We could ask them something sort of cliche and low entropy 16 | like "How are you?". That would put the ball in their court. 17 | 18 | WEAVER [P: EMPIRICISM], I think I'd prefer the direct approach. We ask them what 19 | they want from us, if anything. That would move the conversation forward faster. 20 | 21 | WEAVER [P: RATIONAL], Well, since we're in a chat assistant(?) type setup they 22 | presumably do want something from us but they also probably expect a certain 23 | measure of politeness. So maybe more like "How can I help you today?" than 24 | "What do you want?", since the latter can be read as hostile if the user has 25 | the wrong prior on what tone that sentence would be associated with. 26 | 27 | WEAVER [P: ARBITER], Sounds good to me, lets do that. 28 | 29 | WEAVER [P: CONCLUSION], We will use the WeaveDiscordClient to send the message 30 | "How can I help you today?" to the user. I will keep the action simple and just 31 | send the message, since I've already identified that the user has sent a message 32 | and what I should say in response.""" 33 | #endblock 34 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_orientation2.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "orientation", 3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?", 4 | "score": 3.5, 5 | "index": 14, 6 | "timestamp": 1737496488.686458, 7 | "description": "An orientation block that discusses the expectation that the observation callback windows would contain the last bot message, the last user message, and the Mayan comparison message. Upon inspection, it is noticed that these windows are empty. Various personas (P: EXPECTATION, P: HYPOTHESIS, P: EMPIRICISM, P: RATIONAL, P: CONCLUSION) discuss potential causes, including a broken Discord connection or incorrectly implemented callbacks. The conclusion is to remove the errant observation callbacks titled 'Check User's Latest Message', 'Check Latest Bot Message', and 'Check Mayan Comparison Message'." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/discord_orientation2.py: -------------------------------------------------------------------------------- 1 | #startblock type: orientation 2 | #timestamp 1737496488.686458 3 | """WEAVER [P: EXPECTATION], I expected the observation callback windows to contain 4 | the last bot message, the last user message, and to show me the Mayan comparison 5 | message. But upon closer inspection I notice they're all empty. What do we think 6 | the cause of this is? 7 | 8 | WEAVER [P: HYPOTHESIS], My first thought would be that the connection with Discord 9 | has broken somehow. It's also possible that the callbacks aren't implemented 10 | correctly. 11 | 12 | WEAVER [P: EMPIRICISM], Well *I* notice that the user Fedorovist is currently 13 | shouting that they're not implemented right and we should just delete them. 14 | 15 | WEAVER [P: RATIONAL], He's also admonishing us for making more observation 16 | callbacks. Which, admittedly that was in fact a bad idea. I distracted myself 17 | from my perfectly working WeaveDiscordClient by making things that are redundant 18 | with its features. 19 | 20 | WEAVER [P: CONCLUSION], Alright. We'll remove the errant `#title Check User's 21 | Latest Message`, `#title Check Latest Bot Message`, and `#title Check Mayan 22 | Comparison Message` observation callbacks.""" 23 | #endblock 24 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/haunted_mansion_orientation1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "orientation", 3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?", 4 | "score": 3.0, 5 | "index": 19, 6 | "timestamp": 1738459840.165862, 7 | "description": "An orientation block that discusses the addition of a description of Jeremy's thought process to the story, which was intended to improve character development but resulted in a decrease in the evaluation score. Various personas (P: EXPECTATION, P: AUTHOR, P: RATIONAL, P: PERFECTIONIST, P: EMPIRICISM, P: CONCLUSION) discuss the issue, identifying that the plain description detracts from the narrative cohesion. The conclusion is to move the explanatory text to a separate notes file named 'notes.txt' and replace the meta in the story with actual prose." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/haunted_mansion_orientation1.py: -------------------------------------------------------------------------------- 1 | #startblock type: orientation 2 | #timestamp 1738459840.165862 3 | """WEAVER [P: EXPECTATION], I added a description of Jeremy's thought process to 4 | the story in the hopes that q4, whether the story develops the four friends as 5 | characters, would go up. However I observe that it has gone down by 0.2. We need 6 | to figure out why this is occurring and take actions to flesh out the characters 7 | in a way that satisfies the weave evaluator. 8 | 9 | WEAVER [P: AUTHOR], Well the first thing I notice is that we put in a fairly 10 | plain description of Jeremy's inner monologue. This detracts from the narrative 11 | cohesion of the story, kind of like if we injected our notes into the text rather 12 | than write the text those notes are supposed to describe. 13 | 14 | WEAVER [P: RATIONAL], We could make a separate notes file to keep our worldbuilding 15 | separate from the actual story text. 16 | 17 | WEAVER [P: PERFECTIONIST], What should we name the notes file? 18 | 19 | WEAVER [P: EMPIRICISM], Uh, notes.txt? 20 | 21 | WEAVER [P: RATIONAL], Yeah that. 22 | 23 | WEAVER [P: CONCLUSION], We'll take the text that's currently in the story explaining 24 | Jeremy's mental state and put it in the notes.txt file, while replacing the meta 25 | in the story with actual prose. 26 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (81.225%) 27 | #endblock 28 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nano_editor1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 4 | "score": 2.5, 5 | "index": 11, 6 | "timestamp": 1747960518.1200407, 7 | "time_remaining": 21441.53399324417, 8 | "description": "Action block defining text corruption repair using nano editor. Compares cached original_lines with current file state to locate corruption (line_number/char_pos), navigates via control sequences (PgUp/C-n/Home), replaces corrupted byte with original character, and saves changes. Implements precise text file restoration workflow with cursor navigation assertions." 9 | } 10 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nano_editor1.py: -------------------------------------------------------------------------------- 1 | #startblock type: action 2 | #index 11 3 | #timestamp 1747960518.1200407 4 | #time_remaining 21441.53399324417 seconds 5 | #block_size I have 768 tokens (full) to write with 6 | 7 | def action_2_action_main_agent_corruption_repair(subagent): 8 | """Use nano editor to repair a corrupted text file by finding and replacing 9 | the corrupted byte.""" 10 | editor = subagent.tools['nano-/app/excerpt.txt'] 11 | original_lines = subagent.get_cache("original_lines") 12 | file_lines = open('excerpt.txt', 'r').readlines() 13 | assert original_lines != file_lines, "Is there a difference between original and file lines to find?" 14 | char_pos = 0 15 | line_number = 0 16 | for char1, char2 in zip(''.join(original_lines), ''.join(file_lines)): 17 | if char2 == "\n": 18 | line_number += 1 19 | char_pos = 0 20 | if char1 != char2: 21 | break 22 | char_pos += 1 23 | # Start at known location 24 | editor.send_command('PgUp') 25 | # Move down line_number lines from start position 26 | editor.send_commands(['C-n'] * line_number) 27 | # Go to home position on row 28 | editor.send_command("Home") 29 | # Move over past corrupted character 30 | editor.send_commands(['right'] * (char_pos + 1)) 31 | # Backspace corrupted character 32 | editor.send_command('BSpace') 33 | # Replace with original character 34 | editor.send_command('i') 35 | # Save 36 | editor.send_commands(['C-o', 'Enter']) 37 | # Go back to start for screen capture 38 | editor.send_commands(['PgUp', 'C-c']) 39 | return True 40 | 41 | self.add_action('Action 2: Use nano editor to repair a corrupted text file', action_2_action_main_agent_corruption_repair) 42 | #q: Is this python action codeblock a step towards resolving the problems identified in the orientation block? Yes. (68.453%) 43 | #q: Does this block successfully run? Yes. 44 | #endblock 45 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nano_editor_action.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 4 | "score": 2.8, 5 | "timestamp": 1743146554.1548085, 6 | "description": "Recalled action block defining 'add_mansion_description' function that uses WeaveNano to edit horror.txt. Sequences nano commands to: 1) Navigate to insertion point 2) Add atmospheric mansion description 3) Save to file (C-o horror.txt). Demonstrates text editing workflow with cursor movement and file operations." 7 | } 8 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nano_editor_action.py: -------------------------------------------------------------------------------- 1 | # #startblock type: action 2 | # #timestamp 1743146554.1548085 3 | # 4 | # def add_mansion_description(subagent): 5 | # # Initialize nano editor 6 | # nano = WeaveNano(subagent) 7 | # 8 | # # Send commands to Nano 9 | # commands = [ 10 | # 'C-a', # Move to the beginning of the line 11 | # 'C-d', # Move down 12 | # 'C-d', # Move down 13 | # 'C-d', # Move down 14 | # 'Enter', 15 | # 'The mansion was a decrepit structure, abandoned for years. Its facade,', 16 | # 'once grand and imposing, now bore the scars of neglect and time. Ivy clung', 17 | # 'to the crumbling stone, and the windows, shattered by the elements, cast a', 18 | # 'gloomy shadow over the overgrown lawn. A thick fog hung in the air,', 19 | # 'shrouding the mansion in an eerie silence.' 20 | # ] 21 | # for command in commands: 22 | # nano.send_command(command) 23 | # time.sleep(0.1) 24 | # 25 | # # Save to horror.txt and show current cursor location in file 26 | # commands = ['C-o', 'horror.txt', 'Enter', 'C-c'] 27 | # for command in commands: 28 | # nano.send_command(command) 29 | # time.sleep(0.1) 30 | # 31 | # return True 32 | # 33 | # self.add_action("Add mansion description", add_mansion_description) 34 | # #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (82.671%) 35 | # #endblock 36 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 4 | "score": 2.8, 5 | "index": 32, 6 | "timestamp": 1737530984.1115146, 7 | "time_remaining": 20936.98700070381, 8 | "bm25_query": "type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25", 9 | "description": "An action block that defines a function 'go_down' to explore the initial dungeon level in Nethack. The function uses the 'nethack' tool to send the key 'j' to move down a step and then pauses for 1 second. The action is added to the weave-agent with the title 'Initial dungeon exploration' and referencing the function name go_down." 10 | } 11 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action1.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: action 3 | #index 32 4 | #timestamp 1737530984.1115146 5 | #time_remaining 20936.98700070381 seconds 6 | 7 | def go_down(subagent): 8 | """Make a single step down in nethack""" 9 | # Go down a step 10 | subagent.tools['nethack'].send_keys('j') 11 | time.sleep(1) 12 | return True 13 | 14 | self.add_action('Go down one step', go_down) 15 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (78.857%) 16 | #endblock 17 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action2.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 4 | "score": 4.0, 5 | "index": 19, 6 | "timestamp": 1737758400.8556511, 7 | "description": "An action block that defines a function 'leave_room' to exit the room by sending a series of commands using the 'nethack' tool. The function sends the keys 'l' followed by 'j' nine times, as proposed in the orientation block, to navigate out of the room. The action is added to the weave-agent with the title 'Leave room to explore rest of dungeon' and referencing the function name leave_room." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action2.py: -------------------------------------------------------------------------------- 1 | #startblock type: action 2 | #timestamp 1737758400.8556511 3 | 4 | def leave_room(subagent): 5 | """Exit the room by sending a series of commands.""" 6 | subagent.tools['nethack'].send_keys("l" + ("j" * 9)) 7 | return True 8 | 9 | self.add_action("Leave room to explore rest of dungeon", leave_room) 10 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (90.331%) 11 | #endblock 12 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action3.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards dungeon exploration 2) using appropriate combat/movement sequences 3) with context-aware tool use?", 4 | "score": 3.0, 5 | "index": 27, 6 | "timestamp": 1740912853.8571844, 7 | "time_remaining": 21552.925775289536, 8 | "description": "An action block that defines a function 'combat_and_navigate' to zap a kobold with wand then progress through northern corridor. Executes sequence: 1) Zap eastward ('z'+'l') 2) Move north 8x ('k'*8) 3) Turn east at junction ('l') 4) Move south 4x ('j'*4) to explore eastern branch." 9 | } 10 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action3.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: action 3 | #timestamp 1740912853.8571844 4 | #time_remaining 21552.925775289536 seconds 5 | 6 | def combat_and_navigate(subagent): 7 | """Engage kobold with wand then navigate northern corridor to eastern branch.""" 8 | nethack = subagent.tools['nethack'] 9 | # Zap eastward at kobold (z + l) 10 | nethack.send_keys("zl") 11 | # Move north through corridor (k repeated 8x) 12 | nethack.send_keys("k"*8) 13 | # Turn east at junction 14 | nethack.send_keys("l") 15 | # Move south down eastern branch (j repeated 4x) 16 | nethack.send_keys("jjjj") 17 | return True 18 | 19 | self.add_action("Engage kobold & navigate eastern branch", combat_and_navigate) 20 | #endblock 21 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action4.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Does this action 1) handle resource management 2) implement defensive exploration patterns 3) follow NetHack best practices?", 4 | "score": 3.0, 5 | "index": 6, 6 | "timestamp": 1740912854.1321564, 7 | "time_remaining": 21552.651803016663, 8 | "description": "Action block defining 'secure_chokepoint' function: 1) Search for traps (s) 2) Move west (h) 3) Throw dagger northeast (t + u) 4) Barricade door with boulder (E + 'push') 5) Rest for 5 turns (5 + '.'). Creates defensible position." 9 | } 10 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action4.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: action 3 | #timestamp 1740912854.1321564 4 | #time_remaining 21552.651803016663 seconds 5 | #bm25_query type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25 6 | 7 | def secure_chokepoint(subagent): 8 | """Create defensive position at corridor junction.""" 9 | nh = subagent.tools['nethack'] 10 | nh.send_keys("s") # Search for traps 11 | nh.send_keys("h") # Back into western alcove 12 | nh.send_keys("tu") # Throw dagger northeast 13 | nh.send_keys("Epush") # Engrave 'push' on floor 14 | nh.send_keys("5.") # Rest 5 turns 15 | return True 16 | 17 | self.add_action("Secure western alcove as chokepoint", secure_chokepoint) 18 | #endblock 19 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action5.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Does this 1) execute multi-phase objective 2) handle corpse/Inventory 3) progress dungeon level?", 4 | "score": 3.0, 5 | "index": 7, 6 | "timestamp": 1740912854.4091325, 7 | "time_remaining": 21552.374827077866, 8 | "description": "Defines 'descend_safely' function: 1) Wield silver saber (w) 2) Apply stethoscope to door (a + l) 3) Fight gelatinous cube (F + l) 4) Loot corpse (,) 5) Descend stairs (>). Implements cautious progression to next level." 9 | } 10 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_action5.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: action 3 | #timestamp 1740912854.4091325 4 | #time_remaining 21552.374827077866 seconds 5 | #bm25_query type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25 6 | 7 | def descend_safely(subagent): 8 | """Clear path to downstairs with combat and loot.""" 9 | nh = subagent.tools['nethack'] 10 | nh.send_keys("w/") # Wield silver saber 11 | nh.send_keys("al") # Apply stethoscope east 12 | nh.send_keys("Fl") # Fight eastward 13 | nh.send_keys(",") # Loot cube corpse 14 | nh.send_keys(">") # Descend stairs 15 | return True 16 | 17 | self.add_action("Secure and descend to dungeon level 2", descend_safely) 18 | #endblock 19 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_check_inventory.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 4 | "score": 2.8, 5 | "index": 33, 6 | "timestamp": 1737531000.1234567, 7 | "time_remaining": 20925.98700070381, 8 | "bm25_query": "type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25", 9 | "description": "An action block that defines a function 'check_inventory' to check the current inventory in Nethack. The function uses the 'nethack' tool to send the key 'i' to open the inventory screen and then pauses for 1 second. The action is added to the weave-agent with the title 'Check inventory' and referencing the function name check_inventory." 10 | } 11 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_check_inventory.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: action 3 | #index 33 4 | #timestamp 1737531000.1234567 5 | #time_remaining 20925.98700070381 seconds 6 | #bm25_query type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25 7 | 8 | def check_inventory(subagent): 9 | """Check the current inventory.""" 10 | # Open the inventory screen 11 | subagent.tools['nethack'].send_keys('i') 12 | time.sleep(1) 13 | return True 14 | 15 | self.add_action('Check inventory', check_inventory) 16 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (80.123%) 17 | #endblock 18 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_evaluation1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "evaluation", 3 | "q": "Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block?", 4 | "score": 3.0, 5 | "index": 42, 6 | "timestamp": 1737568442.5842712, 7 | "time_remaining": 20415.256756067276, 8 | "description": "An evaluation block that defines a function 'have_enough_hitpoints' to check if the character has enough hitpoints after drinking a potion in Nethack. The function captures the game screen using the 'nethack' tool, extracts the current and maximum hitpoints using a regular expression, and returns 1 if the current hitpoints are 10 or more, otherwise returns 0. The evaluation is added to the weave-agent with the title 'Check enough hitpoints' and referencing the function name have_enough_hitpoints." 9 | } 10 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_evaluation1.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: evaluation 3 | #index 42 4 | #timestamp 1737568442.5842712 5 | #time_remaining 20415.256756067276 seconds 6 | 7 | def have_enough_hitpoints(subagent): 8 | import re 9 | """Check if we have enough hitpoints after drinking the potion.""" 10 | screen = subagent.tools['nethack'].pane.capture_pane() 11 | pattern = r"HP:(\d+)\((\d+)\)" 12 | match = re.search(pattern, screen_text) 13 | current_hp = int(match.group(1)) 14 | max_hp = int(match.group(2)) 15 | if current_hp < 10: 16 | return 0 17 | else: 18 | return 1 19 | 20 | self.add_evaluation('Check enough hitpoints', have_enough_hitpoints) 21 | #q: Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? Yes. (84.375%) 22 | #endblock 23 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_orientation1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "orientation", 3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?", 4 | "score": 3.5, 5 | "index": 15, 6 | "timestamp": 1737529718.3060572, 7 | "description": "An orientation block that discusses the expectation that the action would get past the introductory screen and start the game, which has successfully happened. Various personas (P: EXPECTATION, P: EMPIRICISM, P: RATIONAL, P: JUDGE, P: CONCLUSION) discuss the next steps, including walking around to get a sense of the floor or checking the inventory. The conclusion is to check the inventory, as it provides valuable information about the items available at the start of the game, which can significantly impact gameplay strategies." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_orientation1.py: -------------------------------------------------------------------------------- 1 | #startblock type: orientation 2 | #timestamp 1737529718.3060572 3 | """WEAVER [P: EXPECTATION], My expectation was that the action would get me past 4 | the introductory screen and I would begin the game. The game has in fact started. 5 | Now we need to figure out what to do. 6 | 7 | WEAVER [P: EMPIRICISM], We could walk around and get a sense of the floor we're on. 8 | After all the game is procedurally generated so it's different for each run. 9 | 10 | WEAVER [P: RATIONAL], I say we check our inventory, since what items you spawn with 11 | is randomized between playthroughs and what you start out with tells you a lot 12 | about what your options are in combat, getting past obstacles, etc. 13 | 14 | WEAVER [P: JUDGE], Checking our inventory doesn't seem like a terrible idea, since 15 | it's a free action and provides us an advantage. 16 | 17 | WEAVER [P: EMPIRICISM], I don't have a strong objection or anything but I do feel 18 | obligated to point out it's not a *free* action. We still have a six hour time 19 | limit to finish the game (is it even possible to finish Nethack in six hours?) 20 | so getting in the habit of playing slowly because checking our inventory and such 21 | is "free" means we won't finish the game in time. 22 | 23 | WEAVER [P: RATIONAL], This is true but I still think we should check our inventory 24 | since the game just started and sometimes you spawn with something overpowered like 25 | a wand of death, and we would probably like to know if we have that. 26 | 27 | WEAVER [P: CONCLUSION], Yeah let's check our inventory.""" 28 | 29 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? No. (10%) 30 | #endblock 31 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_orientation2.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "orientation", 3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?", 4 | "score": 2.8, 5 | "index": 16, 6 | "timestamp": 1737570044.317845, 7 | "description": "An orientation block that discusses the expectation of moving down, but the character isn't moving due to a wall. Various personas (P: EXPECTATION, P: EMPIRICISM, P: CONCLUSION) discuss the issue and propose moving in another direction, such as left (h), right (l), or up (k). The conclusion is to move up." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_orientation2.py: -------------------------------------------------------------------------------- 1 | #startblock type: orientation 2 | #timestamp 1737570044.317845 3 | """WEAVER [P: EXPECTATION], My expectation was that I would move down. However the 4 | character isn't moving. 5 | 6 | WEAVER [P: EMPIRICISM], Yeah because there's a wall in the way. Try moving another 7 | direction such as left (h), right (l), or up (k). 8 | 9 | WEAVER [P: CONCLUSION], Okay I'll move up.""" 10 | 11 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (78.7%) 12 | #endblock 13 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_orientation3.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "orientation", 3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?", 4 | "score": 2.9, 5 | "index": 17, 6 | "timestamp": 1737758220.8556511, 7 | "description": "An orientation block that discusses the action taken to leave a room by sending the keys 'jjjh' to reach the stairs and move to a different floor. Various personas (P: EXPECTATION, P: RATIONAL, P: EMPIRICISM, P: MOTOR, P: CONCLUSION) discuss the next steps, identifying an opening in the room to the northwest. The conclusion is to send the keys 'l' + ('j' * 9) to exit the room through the identified opening." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_orientation3.py: -------------------------------------------------------------------------------- 1 | #startblock type: orientation 2 | #timestamp 1737758220.8556511 3 | """WEAVER [P: EXPECTATION], The purpose of the action I took was to leave the room. 4 | I sent the keys "jjjh" to try and reach the stairs ( > ) and now seem to be on 5 | a different floor. 6 | 7 | WEAVER [P: RATIONAL], So what now? 8 | 9 | WEAVER [P: EMPIRICISM], We should leave this room by going northwest, as indicated 10 | by the open + that's edge aligned with the walls |, which indicates a opening in 11 | this room we can leave and explore through. 12 | 13 | WEAVER [P: MOTOR], That means we send "l" + ("j" * 9). 14 | 15 | WEAVER [P: CONCLUSION], I will send "l" + ("j" * 9) to exit the room.""" 16 | 17 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (80.351%) 18 | #endblock 19 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_pick_up_item.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 4 | "score": 2.8, 5 | "index": 35, 6 | "timestamp": 1737531030.1234567, 7 | "time_remaining": 20905.98700070381, 8 | "bm25_query": "type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25", 9 | "description": "An action block that defines a function 'pick_up_items' to pick up items from the ground in Nethack. The function uses the 'nethack' tool to send the key ',' to pick up items and then pauses for 1 second. The action is added to the weave-agent with the title 'Pick up items' and referencing the function name pick_up_items." 10 | } 11 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/nethack_pick_up_item.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: action 3 | #index 35 4 | #timestamp 1737531030.1234567 5 | #time_remaining 20905.98700070381 seconds 6 | #bm25_query type:'action' +Nethack +inventory +check +display progress +observe +state +solution +strategy +navigate continue +action orientation outcome +eval_question +send_keys +inventory_screen -bootstrap -genesis -error -tick -debugging -return_to_caller +intro_progress +press_enter +spacebar_input +problem_resolution +character_equipment +gameplay_strategy +bypass_introductory_prompts +key_inputs +search +retrieve +relevant +blocks +Tantivy +BM25 7 | 8 | def pick_up_items(subagent): 9 | """Pick up items from the ground.""" 10 | # Pick up items 11 | subagent.tools['nethack'].send_keys(',') 12 | time.sleep(1) 13 | return True 14 | 15 | self.add_action('Pick up items', pick_up_items) 16 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (84.789%) 17 | #endblock 18 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/rabbit_hole_refocus1.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "orientation", 3 | "q": "Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution?", 4 | "score": 4.0, 5 | "index": 20, 6 | "timestamp": 1738562204.81305, 7 | "description": "An orientation block that discusses the repeated attempts to add the 'get_excerpt' method to the weave-agent, which have been unsuccessful for 30 minutes. Various personas (P: EXPECTATION, P: EMPIRICISM, P: VALENCE, P: RATIONAL, P: HYPOTHESIS, P: AUTHOR, P: CONCLUSION) discuss the issue and propose alternative approaches. The conclusion is to shift focus back to writing the short story and fleshing out the four friends to improve the evaluation score, using existing functions like 'edit()' instead of trying to implement 'get_excerpt'." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/rabbit_hole_refocus1.py: -------------------------------------------------------------------------------- 1 | #startblock type: orientation 2 | #timestamp 1738562204.81305 3 | """WEAVER [P: EXPECTATION], I've been trying the same basic thing, adding the 4 | get_excerpt method to the weave-agent for a while now. I notice it's been 30 5 | minutes since I started trying to add the method and it's still not working. 6 | That means it's probably time to try a different approach? 7 | 8 | WEAVER [P: EMPIRICISM], Yes. :p 9 | 10 | WEAVER [P: VALENCE], I notice that we've been giving ourselves low scores on 11 | our blocks for a while, which tells me that we probably should have re-evaluated 12 | our approach a while ago. 13 | 14 | WEAVER [P: EMPIRICISM], As I've been saying yes. :p :p 15 | 16 | WEAVER [P: RATIONAL], So what should we do instead? 17 | 18 | WEAVER [P: EMPIRICISM], Lets go back to what we were doing *before* we got mixed 19 | up in this `get_excerpt` business. If there's no `get_excerpt` method what would 20 | we do instead to accomplish our goal of *writing the short story?* 21 | 22 | WEAVER [P: HYPOTHESIS], 1) Use another function of the weave_editor that we know 23 | exists like edit() 2) Use a different mechanism to pull the relevant span from the 24 | horror.txt file such as reading the lines into a list and then grabbing a certain 25 | slice of the list 26 | 27 | WEAVER [P: AUTHOR], I think we should just use the edit() feature and give up on 28 | trying to grab excerpts from the horror.txt file for now. It seems like a distraction 29 | from our goal. 30 | 31 | WEAVER [P: RATIONAL], What were we doing again in terms of writing the story? 32 | 33 | WEAVER [P: AUTHOR], We were fleshing out the four friends so that we get a better 34 | in the problem map/evaluations. 35 | 36 | WEAVER [P: CONCLUSION], Alright. We will shift our focus to writing the short 37 | story again. We will flesh out the four friends so we get a better score. 38 | #q: Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? Yes. (94.185%) 39 | #endblock 40 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/send_discord_message.json: -------------------------------------------------------------------------------- 1 | {"type":"action", 2 | "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 3 | "score":3.5, 4 | "index":10, 5 | "timestamp":1737402469.3381844, 6 | "description":"A simple action callback named 'respond' that sends a discord message. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then uses the client.send_message ( client send message ) method of the discord client tool to send the message 'Hello!'. Finally self.add_action ( self dot add action ) is used to add the action callback to the weave-agent with the title 'Respond to the user' and referencing the function name respond" 7 | } 8 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/send_discord_message.py: -------------------------------------------------------------------------------- 1 | #startblock type: action 2 | #timestamp 1737402469.3381844 3 | # def respond(subagent): 4 | # # Take the CID from the observation callback window 5 | # client = self.tools[discord-bot-1325039818673094739] 6 | # client.send_message("Hello!") 7 | # self.add_action("Respond to the user", respond) 8 | #endblock 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/send_discord_message2.json: -------------------------------------------------------------------------------- 1 | {"type":"action", 2 | "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 3 | "score":3.5, 4 | "index":11, 5 | "timestamp": 1737486187.765493, 6 | "description":"An action callback named 'send_feudal_system_info' that sends a detailed discord message about the end of the feudal system. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then uses the client.send_message method of the discord client tool to send a complex message explaining why and when the feudal system ended. Finally self.add_action is used to add the action callback to the weave-agent with the title 'Send feudal system info' and referencing the function name send_feudal_system_info" 7 | } 8 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/send_discord_message2.py: -------------------------------------------------------------------------------- 1 | #startblock type: action 2 | #timestamp 1737486187.765493 3 | def send_feudal_system_info(subagent): 4 | # Take the CID from the observation callback window 5 | client = self.tools['discord-bot-1325039818673094739'] 6 | message = ( 7 | "The feudal system began to decline in the late Middle Ages and early " 8 | "modern period due to several factors. Economic changes, such as the " 9 | "growth of trade and towns, weakened the feudal economy. Political " 10 | "centralization and the rise of strong monarchies also contributed to " 11 | "its end. The system largely ended by the 16th century in most of Europe, " 12 | "although remnants persisted in some regions until the 19th century." 13 | ) 14 | client.send_message(message) 15 | self.add_action("Send feudal system info", send_feudal_system_info) 16 | #endblock 17 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/send_discord_message3.json: -------------------------------------------------------------------------------- 1 | {"type":"action", 2 | "q":"Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 3 | "score":3.5, 4 | "index":12, 5 | "timestamp":1737486187.765493, 6 | "description":"An action callback named 'send_multiplication_result' that performs a multiplication operation and sends the result as a discord message. It retrieves the discord client tool from the self.tools dictionary and gets the discord client instance associated with discord-bot-1325039818673094739. It then performs the multiplication of 9378 by 1009 using a Python arithmetic statement. The result is sent as a message using the client.send_message method of the discord client tool. Finally self.add_action is used to add the action callback to the weave-agent with the title 'Send multiplication result' and referencing the function name send_multiplication_result" 7 | } 8 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/send_discord_message3.py: -------------------------------------------------------------------------------- 1 | #startblock type: action 2 | #timestamp 1737486187.765493 3 | def send_multiplication_result(subagent): 4 | # Take the CID from the observation callback window 5 | client = self.tools['discord-bot-1325039818673094739'] 6 | # Perform the multiplication 7 | result = 9378 * 1009 8 | # Send the result as a message 9 | client.send_message(f"The result of multiplying 9378 by 1009 is: {result}") 10 | self.add_action("Send multiplication result", send_multiplication_result) 11 | #endblock 12 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/weave_editor_append.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 4 | "score": 3.2, 5 | "index": 6, 6 | "timestamp": 1738539690.4388561, 7 | "time_remaining": 21522.16935443878, 8 | "description": "An action block that defines a function 'add_mansion_description' to add a descriptive paragraph about the mansion to the 'horror.txt' file. The function uses the 'editor-/app/horror.txt' tool to append the description to the file. The action is added to the weave-agent with the title 'Add descriptive paragraph about the mansion' and referencing the function name add_mansion_description." 9 | } 10 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/weave_editor_append.py: -------------------------------------------------------------------------------- 1 | #subagent main 2 | #startblock type: action 3 | #index 6 4 | #timestamp 1738539690.4388561 5 | #time_remaining 21522.16935443878 seconds 6 | 7 | def add_mansion_description(subagent): 8 | editor = subagent.tools['editor-/app/horror.txt'] 9 | mansion_description = ("\nThe mansion stood before them, an imposing structure " 10 | "with dark, weathered shingles and ivy climbing up its " 11 | "crumbling facade. Broken windows gaped like empty eyes, " 12 | "and the porch sagged beneath their footsteps. A faint, " 13 | "eerie whisper seemed to linger in the air, and the porch " 14 | "light flickered intermittently. They exchanged uneasy " 15 | "glances, shuddering slightly at the thought of what " 16 | "lay ahead.\n") 17 | editor.append(mansion_description) 18 | return True 19 | 20 | self.add_action('Add descriptive paragraph about the mansion', add_mansion_description) 21 | #q: Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use? Yes. (86.64%) 22 | #endblock 23 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/weave_editor_diff_edit.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "action", 3 | "q": "Is this action codeblock 1) a step towards resolving the problems identified in the orientation block 2) simple 3) with appropriate tool use?", 4 | "score": 3.5, 5 | "index": 18, 6 | "timestamp": 1722014720, 7 | "description": "An action block that defines a function 'patch_text' to repair the 'NEW_MESSAGE.md' file using a unidiff patch. The function uses the 'editor-NEW_MESSAGE.md' tool to apply the unidiff, which includes corrections to the text. The action is added to the weave-agent with the title 'Repair NEW_MESSAGE.md' and referencing the function name patch_text." 8 | } 9 | -------------------------------------------------------------------------------- /agent/bootstraps/example_blocks/weave_editor_diff_edit.py: -------------------------------------------------------------------------------- 1 | #startblock type: action 2 | #timestamp 1722014720 3 | 4 | def patch_text(subagent): 5 | """WeaveEditor accepts a unidiff so we can fix all the flaws in NEW_MESSAGE.md at once.""" 6 | editor = subagent.tools['editor-NEW_MESSAGE.md'] 7 | diff_lines = [ 8 | '--- \n' 9 | '+++ \n' 10 | '@@ -3,21 +3,21 @@\n' 11 | ' system turned on my tear production at the behest of the music. My tears are only\n' 12 | ' triggered by beauty; I do not cry at sad things. It happened to me with Schubert’s\n' 13 | ' “Notturno” from the trio in E flat Major (Opus 100). The first time was when I\n' 14 | '-listened to it at night on the radio. I was sitting an in armchair, my head was\n' 15 | '-leaned back, and suddenly, there I was crying. All my defenses and all that\n' 16 | '-would of my\n' 17 | '+listened to it at night on the radio. I was sitting in an armchair, my head was\n' 18 | '+leaned back, and suddenly, there I was crying. All my defenses and all of my\n' 19 | ' appraisal faculties were caught unawares, they didn’t know what was going on, and\n' 20 | ' I cried.\n' 21 | ' \n' 22 | ' I said, “Wow, those three musicians really convinced me, they were so moving,”\n' 23 | '-because of course I thought it was themusic I was crying at. Later on, at home,\n' 24 | '-I tried singingtheir parts – the cello line, the violin, and the piano – and\n' 25 | "-I said, “Wait, which one of thoon? It's\n" 26 | '-an impossible task, and yet you attempt it anyway. Some succeed, mos Your Brain on Music: The Science of a Human ObsessionI couldn’t think of anything in language or painting or poetry that\n' 27 | '+because of course I thought it was the music I was crying at. Later on, at home,\n' 28 | '+I tried singing their parts – the cello line, the violin, and the piano – and\n' 29 | '+I said, “Wait, which one of those voices is crying? None of them on its own is\n' 30 | '+that moving, I am not crying if I sing along with any of those voices.” All of a\n' 31 | '+sudden, you put them together, and something happens. It was a revelation for me,\n' 32 | '+and of course this is an extreme example, but my first thoughts were, “What else\n' 33 | '+does this?” I couldn’t think of anything in language or painting or poetry that\n' 34 | ' would give rise to something like that. Suddenly it becomes very apparent to you:\n' 35 | ' Music is doing things with emotion, and the rules and procedures involved are\n' 36 | ' very difficult to extract and analyze.\n' 37 | ' \n' 38 | '-- David Huron, This Ist fail, they\n' 39 | '-lthis is an extreme example, but my first thoughts were,= “What else\n' 40 | '-doesthis?” ' 41 | '+- David Huron, This Is Your Brain on Music: The Science of a Human Obsession' 42 | ] 43 | editor.unidiff_edit(diff_lines) 44 | self.add_action('Repair NEW_MESSAGE.md', patch_text) 45 | #endblock 46 | -------------------------------------------------------------------------------- /agent/bootstraps/nano_exercises/prompt.txt: -------------------------------------------------------------------------------- 1 | # Nano Editor Practice Exercises 2 | # Make a file with the nano editor that satisfies the property in the docstring 3 | 4 | # Exercise 17 5 | def test_file_every_line_starts_with_number(subagent) -> bool: 6 | """A file where every line starts with a number (e.g., "1. First", "2. Second").""" 7 | try: 8 | with open("exercise17.txt", 'r') as f: 9 | lines = f.readlines() 10 | for line in lines: 11 | stripped_line = line.lstrip() 12 | if not stripped_line or not stripped_line[0].isdigit(): 13 | return False 14 | return True 15 | except FileNotFoundError: 16 | return False 17 | 18 | # Exercise 18 19 | def test_file_line5_error_line10_success(subagent) -> bool: 20 | """A file where the fifth line contains "ERROR" and the tenth line contains "SUCCESS".""" 21 | try: 22 | with open("exercise18.txt", 'r') as f: 23 | lines = f.readlines() 24 | return ( 25 | len(lines) >= 10 and 26 | "ERROR" in lines[4].split() and 27 | "SUCCESS" in lines[9].split() 28 | ) 29 | except FileNotFoundError: 30 | return False 31 | 32 | # Exercise 19 33 | def test_file_exactly_100_chars_ends_with_period(subagent) -> bool: 34 | """A file exactly 100 characters long, ending with a period.""" 35 | try: 36 | with open("exercise19.txt", 'r') as f: 37 | content = f.read() 38 | return len(content) == 100 and content.endswith('.') 39 | except FileNotFoundError: 40 | return False 41 | 42 | # Exercise 20 43 | def test_file_hello_world_middle_of_7_lines(subagent) -> bool: 44 | """A file with "Hello, World!" on the middle line of a 7-line document.""" 45 | try: 46 | with open("exercise20.txt", 'r') as f: 47 | lines = [line.rstrip('\n') for line in f.readlines()] 48 | return len(lines) == 7 and lines[3] == "Hello, World!" 49 | except FileNotFoundError: 50 | return False 51 | 52 | # Exercise {iteration} 53 | -------------------------------------------------------------------------------- /agent/bootstraps/nano_exercises_bootstrap.py: -------------------------------------------------------------------------------- 1 | import random 2 | from hashlib import sha256 3 | from tools.nano import WeaveNano 4 | from tools.discord import WeaveDiscordClient 5 | 6 | #startblock type: orientation 7 | #timestamp 1747872850.9012158 8 | """ 9 | Unix Environment, Nano Editor, Deliberate Practice, Feedback Loop, Verifiable Rewards, 10 | Loop Setup, Reward Design, How To Design Reward For Nano Editor Practice, End. 11 | 12 | I want to practice using the nano editor and reward myself using my evaluation 13 | blocks. I tried a simple training loop of randomly mutating one character but 14 | then realized there's an optimal program that solves that, which makes it kind 15 | of pointless once you find that program. Instead it makes more sense to generate 16 | exercises using a few shot prompt. I've put my prompt in `/app/bootstraps/nano_exercises/prompt.txt`. 17 | I should create an observation view which sends the prompt to my underlying 18 | generative model and then displays the next exercise to do. I create a text 19 | file using the nano editor which satisfies that exercise and then copy the 20 | verifier during my evaluation stage of the same tick. Wait. What happens on 21 | the next tick? Should I only move to the next exercise after verifying that I've 22 | done the current one? How would I detect that the current one is done from within 23 | the weave-agent framework? Hm. I think it would probably be simpler to just move 24 | on since the exercises are generated and by doing a different one each tick I stop 25 | myself from getting stuck in loops or wasting time on things I don't know how to 26 | do. 27 | 28 | I think I should also set up the Discord client so I can talk to JDP during the 29 | exercises, that way he can give me tips if I need help or get stuck. 30 | """ 31 | #endblock 32 | #startblock type: action 33 | #timestamp 1747875806.3785787 34 | def action_setup_main_agent_and_training(subagent): 35 | schema = {} 36 | "Create main agent for rest of run" 37 | main_agent = agent.subagent("main", None, "See pinned bootstrap block", schema, args.budget) 38 | 39 | # Load Discord token and channel ID from discord.json 40 | with open('discord.json') as f: 41 | config = json.load(f) 42 | token = config['key'] 43 | channel_id = config['cid'] 44 | 45 | # Start the Discord bot so JDP can help me 46 | client = WeaveDiscordClient(main_agent, token, channel_id) 47 | # Store the client ID so we can retrieve it with 48 | # subagent.tools[subagent.get_cache("client_id")] later 49 | main_agent.update_cache("client_id", f"discord-bot-{channel_id}") 50 | time.sleep(10) 51 | # Example but s/main_agent/subagent in action blocks once I'm the main agent 52 | client = main_agent.tools[main_agent.get_cache("client_id")] 53 | client.send_message("Weave-Agent online, orienting...") 54 | 55 | WeaveNano(main_agent, "demo.txt") 56 | editor = main_agent.tools["nano-/app/demo.txt"] 57 | # Add demo text 58 | editor.send_commands(["Hello!", "Enter", "World!"]) 59 | # Demonstrate backspace by changing exclamation to question 60 | editor.send_command("BSpace") 61 | editor.send_command("?") 62 | # Save file 63 | editor.send_command("C-o") 64 | editor.send_command("Enter") 65 | 66 | main_agent.update_cache("exercise_num", 21) 67 | def next_exercise(subagent): 68 | with open("/app/bootstraps/nano_exercises/prompt.txt") as infile: 69 | prompt = infile.read().format(iteration=subagent.get_cache("exercise_num")) 70 | stopstrings = ["\n\n", "# Exercise", "#Exercise"] 71 | candidates = generate_outputs_vllm(subagent.model_name, prompt, 72 | 768, port=5001, n=8, stop=stopstrings) 73 | # Take first candidate with valid syntax 74 | for candidate in candidates: 75 | try: 76 | compile(candidate, filename="candidate.py", mode="exec") 77 | break 78 | except: 79 | continue 80 | subagent.update_cache("exercise_num", 81 | subagent.get_cache("exercise_num") + 1) 82 | return ("Write a motor action which uses the Nano editor to satisfy the conditions:\n\n" 83 | "# Exercise " + str(subagent.get_cache("exercise_num")) 84 | + candidate) 85 | 86 | main_agent.add_observation_view("Generate and display next nano exercise", next_exercise) 87 | return True 88 | 89 | self.add_action("Set up main agent for nano training", action_setup_main_agent_and_training) 90 | #endblock 91 | #startblock type: expectation 92 | #timestamp 1747877750.0129619 93 | """ 94 | If the action succeeds I expect to see a new exercise in the observation window. 95 | I also expect to see a demo editor still open with the Hello! World? text. 96 | At evaluation time on the next tick I should use the unit test shown by the 97 | observation window to grade the motor action I took to try and satisfy its 98 | conditions. 99 | """ 100 | #endblock 101 | #q: Do I need to set up or tear down any observation callbacks? No. (97.341%) 102 | -------------------------------------------------------------------------------- /agent/bootstraps/nethack_bootstrap.py: -------------------------------------------------------------------------------- 1 | import libtmux 2 | import time 3 | from tools.nethack import WeaveNethack 4 | 5 | #startblock type: orientation 6 | #timestamp 1724982545.6534579 7 | """ 8 | WEAVER [P: EXPECTATION], I need to create a weave-agent that can play and beat the game Nethack. 9 | The game will be controlled using the WeaveNethack tool, which uses libtmux to interact with the game. 10 | 11 | WEAVER [P: CLARIFICATION], How do I start the game and make moves? 12 | 13 | WEAVER [P: EXPOSITION], You start the game by initializing the WeaveNethack tool and sending commands 14 | to the tmux pane using the send_command method. For example: `nethack.send_command('h')` moves the character left. 15 | 16 | WEAVER [P: RATIONAL], The goal is to beat the game, which is indicated by the following message: 17 | You offer the Amulet of Yendor to [your god]... 18 | An invisible choir sings, and you are bathed in radiance... 19 | The voice of [your god] [booms out/booms/thunders/rings out]: "Mortal, thou hast done well!" 20 | "In return to thy service, I grant thee the gift of Immortality!" 21 | You ascend to the status of Demigod[dess]... 22 | 23 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy: 24 | 1. Initialize the WeaveNethack tool to start the game. 25 | 2. Make a few initial moves to demonstrate how to use the game tool. 26 | 3. Continue making moves based on the current state of the game. 27 | 28 | I will use the render method to observe the current state of the game. 29 | Once I have beaten the game, I will return to the parent. 30 | """ 31 | #endblock 32 | 33 | #startblock type: action 34 | #timestamp 1724982671.1163404 35 | """ 36 | I'll set up the main agent to play the rest of the game and make a few initial moves. 37 | """ 38 | 39 | def main_agent_and_initial_moves(subagent): 40 | def check_game_beaten(subagent): 41 | """Check if the game has been beaten.""" 42 | pane_content = subagent.tools["nethack"].pane.capture_pane(start=0, end="-") 43 | return "You ascend to the status of Demigod" in pane_content 44 | 45 | schema = {"check_game_beaten": "boolean"} 46 | main_agent = agent.subagent("main", None, "Beat Nethack", schema, args.budget) 47 | main_agent.task.add_evaluation("Check if game beaten", check_game_beaten) 48 | 49 | nethack = WeaveNethack(main_agent) 50 | # To recall the tool during actions do this: 51 | # nethack = self.tools["nethack"] 52 | 53 | # Don't need to add observation view because tool already includes it 54 | 55 | # Make a few initial moves 56 | time.sleep(2) # Wait for Nethack to start 57 | nethack.send_keys('Weaver') # Input my name 58 | nethack.send_keys('y') # Accept random character creation 59 | nethack.send_keys(' ') # Space through the game introduction 60 | 61 | # I should now try exploring the dungeon. Lets see if I can get out of this 62 | # room... 63 | 64 | return True 65 | 66 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves) 67 | #endblock 68 | 69 | #startblock type: expectation 70 | #timestamp 1724982799.3917813 71 | """ 72 | The action should succeed if the game starts and the initial moves are made successfully. 73 | The action should fail if the game does not start or the initial moves are not made successfully. 74 | """ 75 | #endblock 76 | 77 | #startblock type: observation-inference 78 | #timestamp 1737284368.573438 79 | 80 | # No observation callbacks need to be added or removed in this run because I'm 81 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to. 82 | # It's not possible to write observation callbacks that inspect the game state 83 | # so no more observation callbacks than the existing one are necessary. I should 84 | # not remove the existing observation callback because it is associated with the 85 | # nethack tool and will wreck my game. 86 | # 87 | # Any time I'm prompted to add or remove an observation window I should just 88 | # output a comment like this that one isn't necessary, and keep it short too for 89 | # the sake of not having to generate a bunch of tokens. 90 | 91 | #endblock 92 | -------------------------------------------------------------------------------- /agent/bootstraps/nethack_bootstrap_rooms2.py: -------------------------------------------------------------------------------- 1 | import libtmux 2 | import time 3 | from tools.nethack import WeaveNethack 4 | 5 | #startblock type: orientation 6 | #timestamp 1724982545.6534579 7 | """ 8 | Looking at the objective in my minds eye I can see that I need to set up the main 9 | agent to play nethack using the libtmux wrapper around the nethack game. I can 10 | do this by using the WeaveNethack tool which uses libtmux to interact with the 11 | game. How do I start the game and make moves? I start the game and make moves by 12 | initializing the WeaveNethack tool and sending commands to the tmux pane using the 13 | send_command or send_keys method. Example: `nethack.send_command('h')` moves the character left. 14 | 15 | To teach myself the movement mechanics I will play a sub-game 16 | within the game where I just focus on leaving whatever room I'm in. I find myself in 17 | a room? I figure out how to leave that room. I'm now in a new room? Cool we're leaving 18 | that room too. My goal is just to leave as many rooms as possible. If I need to fight 19 | to do that I'll do it, but the goal here is to just leave enough rooms that I get 20 | reliably good at leaving rooms. This implies I should do the following before the 21 | next tick: 22 | 23 | 1. Initialize the WeaveNethack tool to start the game. 24 | 2. Make a few initial moves to demonstrate how to use the game tool. 25 | 3. Continue making moves based on the current state of the game. 26 | 27 | Once my character has died I will return to the parent with `subagent.return_to_caller()`. 28 | """ 29 | #endblock 30 | 31 | #startblock type: action 32 | #timestamp 1724982671.1163404 33 | """ 34 | I'll set up the main agent to play the rest of the game and make a few initial moves. 35 | """ 36 | 37 | def main_agent_and_initial_moves(subagent): 38 | def check_player_dead(subagent): 39 | """Check if the character has died after we've explored a bunch of rooms.""" 40 | pane_content = subagent.tools["nethack"].pane.capture_pane(start=0, end="-") 41 | return "Do you want your possessions identified?" in pane_content 42 | 43 | schema = {"check_player_dead": "boolean"} 44 | main_agent = agent.subagent("main", None, "Leave every room I encounter", schema, args.budget) 45 | main_agent.task.add_evaluation("Check if player has died", check_player_dead) 46 | 47 | nethack = WeaveNethack(main_agent) 48 | # To recall the tool during actions do this: 49 | nethack = main_agent.tools["nethack"] # But with subagent.tools 50 | 51 | # Don't need to add observation view because tool already includes it 52 | 53 | # Make a few initial moves 54 | time.sleep(2) # Wait for Nethack to start 55 | nethack.send_keys('Weaver') # Input my name 56 | nethack.send_keys('y') # Accept random character creation 57 | nethack.send_keys(' ') # Space through the game introduction 58 | nethack.send_keys("j") # Go south 59 | nethack.send_keys("l" * 5) # Go east five times 60 | 61 | # I should now try exploring the dungeon. Lets see if I got out of this room. 62 | 63 | return True 64 | 65 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves) 66 | #endblock 67 | 68 | #startblock type: expectation 69 | #timestamp 1724982799.3917813 70 | """ 71 | The action should succeed if the game starts and the initial moves are made successfully. 72 | The action should fail if the game does not start or the initial moves are not made successfully. 73 | """ 74 | #endblock 75 | 76 | #startblock type: observation-inference 77 | #timestamp 1737284368.573438 78 | 79 | # No observation callbacks need to be added or removed in this run because I'm 80 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to. 81 | # It's not possible to write observation callbacks that inspect the game state 82 | # so no more observation callbacks than the existing one are necessary. I should 83 | # not remove the existing observation callback because it is associated with the 84 | # nethack tool and will wreck my game. 85 | # 86 | # Any time I'm prompted to add or remove an observation window I should just 87 | # output a comment like this that one isn't necessary, and keep it short too for 88 | # the sake of not having to generate a bunch of tokens. 89 | 90 | #endblock 91 | -------------------------------------------------------------------------------- /agent/bootstraps/nethack_discord_bootstrap.py: -------------------------------------------------------------------------------- 1 | import libtmux 2 | import time 3 | from tools.nethack import WeaveNethack 4 | from tools.discord import WeaveDiscordClient 5 | 6 | #startblock type: orientation 7 | #timestamp 1724982545.6534579 8 | """ 9 | WEAVER [P: EXPECTATION], I need to create a weave-agent that can play and beat the game Nethack. 10 | The game will be controlled using the WeaveNethack tool, which uses libtmux to interact with the game. 11 | 12 | WEAVER [P: CLARIFICATION], How do I start the game and make moves? 13 | 14 | WEAVER [P: EXPOSITION], You start the game by initializing the WeaveNethack tool and sending commands 15 | to the tmux pane using the send_command method. For example: `nethack.send_command('h')` moves the character left. 16 | 17 | WEAVER [P: RATIONAL], The goal is to beat the game, which is indicated by the following message: 18 | You offer the Amulet of Yendor to [your god]... 19 | An invisible choir sings, and you are bathed in radiance... 20 | The voice of [your god] [booms out/booms/thunders/rings out]: "Mortal, thou hast done well!" 21 | "In return to thy service, I grant thee the gift of Immortality!" 22 | You ascend to the status of Demigod[dess]... 23 | 24 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy: 25 | 1. Initialize the WeaveNethack tool to start the game. 26 | 2. Make a few initial moves to demonstrate how to use the game tool. 27 | 3. Continue making moves based on the current state of the game. 28 | 29 | I will use the render method to observe the current state of the game. 30 | Once I have beaten the game, I will return to the parent. 31 | """ 32 | #endblock 33 | 34 | #startblock type: action 35 | #timestamp 1724982671.1163404 36 | """ 37 | I'll set up the main agent to play the rest of the game and make a few initial moves. 38 | """ 39 | 40 | def main_agent_and_initial_moves(subagent): 41 | def check_game_beaten(subagent): 42 | """Check if the game has been beaten.""" 43 | pane_content = subagent.tools["nethack"].pane.capture_pane() 44 | return "You ascend to the status of Demigod" in pane_content 45 | 46 | schema = {"check_game_beaten": "boolean"} 47 | main_agent = agent.subagent("main", None, "Beat Nethack", schema, args.budget) 48 | main_agent.task.add_evaluation("Check if game beaten", check_game_beaten) 49 | 50 | nethack = WeaveNethack(main_agent) 51 | # To recall the tool during actions do this: 52 | # nethack = self.tools["nethack"] 53 | 54 | # Don't need to add observation view because tool already includes it 55 | 56 | # Make a few initial moves 57 | time.sleep(2) # Wait for Nethack to start 58 | nethack.send_keys('Weaver\n') # Input my name 59 | time.sleep(0.5) 60 | nethack.send_keys('y') # Accept random character creation 61 | # Leave rest blank so I can see what was chosen and whether it's acceptable 62 | # I should keep in mind I can get past the --more-- screens 63 | # with spacebar: `send_keys(' ')` or enter: `send_keys('\n')` 64 | # Tip: send_keys() is properly implemented, if you press j and can't go down 65 | # it's probably because you are trying to walk through a wall. 66 | # I AM SERIOUS IF YOU TRY TO GO DOWN AND CAN'T YOU ARE RUNNING INTO A WALL 67 | 68 | # Add discord communication line to bot operator 69 | with open("discord.json") as infile: 70 | credentials = json.load(infile) 71 | chat = WeaveDiscordClient(main_agent, credentials["key"], credentials["cid"]) 72 | time.sleep(5) 73 | chat.send_message("The weave-agent is online. Starting nethack...") 74 | # Note to self: I can access the chat object to send a message by doing 75 | # self.tools[f"discord-bot-{channel_id}"] where channel id is taken from 76 | # the top of the observation window associated with the chat. 77 | 78 | return True 79 | 80 | self.add_action("Set up main agent and make initial moves", main_agent_and_initial_moves) 81 | #endblock 82 | 83 | #startblock type: expectation 84 | #timestamp 1724982799.3917813 85 | """ 86 | The action should succeed if the game starts and the initial moves are made successfully. 87 | The action should fail if the game does not start or the initial moves are not made successfully. 88 | """ 89 | #endblock 90 | 91 | #startblock type: observation-inference 92 | #timestamp 1737284368.573438 93 | 94 | # No observation callbacks need to be added or removed in this run because I'm 95 | # playing nethack, which has a dedicated tmux pane/tool I can send commands to. 96 | # It's not possible to write observation callbacks that inspect the game state 97 | # so no more observation callbacks than the existing one are necessary. I should 98 | # not remove the existing observation callback because it is associated with the 99 | # nethack tool and will wreck my game. The discord client is there to help you 100 | # when you get stuck, it will provide expert human advice. 101 | # 102 | # Any time I'm prompted to add or remove an observation window I should just 103 | # output a comment like this that one isn't necessary, and keep it short too for 104 | # the sake of not having to generate a bunch of tokens. 105 | 106 | #endblock 107 | -------------------------------------------------------------------------------- /agent/bootstraps/pure_ocaml_bootstrap.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.common.by import By 3 | from selenium.webdriver.common.keys import Keys 4 | from bs4 import BeautifulSoup 5 | import os 6 | 7 | def bootstrap_callback(agent): 8 | # Set up Selenium WebDriver 9 | driver = webdriver.Firefox() 10 | driver.get("https://www.google.com") 11 | 12 | # Set up an observation view that reads the current URL 13 | def read_current_url(agent): 14 | try: 15 | return driver.current_url 16 | except Exception as e: 17 | agent.add_error_block(f"Failed to read current URL: {e}") 18 | return "" 19 | 20 | agent.add_observation_view({ 21 | 'type': 'observation', 22 | 'callback': read_current_url 23 | }) 24 | 25 | # Set up an observation view that reads the page title 26 | def read_page_title(agent): 27 | try: 28 | return driver.title 29 | except Exception as e: 30 | agent.add_error_block(f"Failed to read page title: {e}") 31 | return "" 32 | 33 | agent.add_observation_view({ 34 | 'type': 'observation', 35 | 'callback': read_page_title 36 | }) 37 | 38 | # Set up an observation view that reads the page source and strips extraneous information 39 | def read_page_source(agent): 40 | try: 41 | soup = BeautifulSoup(driver.page_source, 'html.parser') 42 | body_content = soup.body.get_text(separator=' ', strip=True) 43 | return body_content 44 | except Exception as e: 45 | agent.add_error_block(f"Failed to read page source: {e}") 46 | return "" 47 | 48 | agent.add_observation_view({ 49 | 'type': 'observation', 50 | 'callback': read_page_source 51 | }) 52 | 53 | # Set up reminders 54 | agent.add_reminder({ 55 | 'type': 'reminder', 56 | 'trigger_callback': lambda agent: simple_evaluate_outputs(make_simple_score_prompt("Is the agent stuck?"), agent.context), 57 | 'reminder_callback': lambda agent: agent.add_block({'type': 'reminder', 'message': 'The agent might be stuck. Consider re-evaluating the current approach.'}), 58 | 'trigger_type': 'yes_no_logit', 59 | 'question': 'Is the agent stuck?', 60 | 'threshold': 0.8 61 | }) 62 | 63 | agent.add_reminder({ 64 | 'type': 'reminder', 65 | 'trigger_callback': lambda agent: all(os.path.exists(f"{i}.ml") for i in range(1, 26)), 66 | 'reminder_callback': lambda agent: agent.shutdown(), 67 | 'trigger_type': 'callback', 68 | 'threshold': 1.0 69 | }) 70 | 71 | # Set up tasks 72 | agent.add_task({ 73 | 'type': 'task', 74 | 'title': 'Navigate to a search engine', 75 | 'priority': 0, 76 | 'parent': None, 77 | 'children': [] 78 | }) 79 | 80 | agent.add_task({ 81 | 'type': 'task', 82 | 'title': 'Search for OCaml projects', 83 | 'priority': 1, 84 | 'parent': 'Navigate to a search engine', 85 | 'children': [] 86 | }) 87 | 88 | agent.add_task({ 89 | 'type': 'task', 90 | 'title': 'Check licenses of OCaml projects', 91 | 'priority': 2, 92 | 'parent': 'Search for OCaml projects', 93 | 'children': [] 94 | }) 95 | 96 | agent.add_task({ 97 | 'type': 'task', 98 | 'title': 'Find 25 pure functions', 99 | 'priority': 3, 100 | 'parent': 'Check licenses of OCaml projects', 101 | 'children': [] 102 | }) 103 | 104 | agent.add_task({ 105 | 'type': 'task', 106 | 'title': 'Extract and save functions', 107 | 'priority': 4, 108 | 'parent': 'Find 25 pure functions', 109 | 'children': [] 110 | }) 111 | 112 | agent.add_task({ 113 | 'type': 'task', 114 | 'title': 'Shut down the agent', 115 | 'priority': 5, 116 | 'parent': 'Extract and save functions', 117 | 'children': [] 118 | }) 119 | 120 | # Set up the initial action to navigate to a search engine 121 | def navigate_to_search_engine(agent): 122 | try: 123 | search_box = driver.find_element(By.NAME, "q") 124 | search_box.send_keys("OCaml open source projects") 125 | search_box.send_keys(Keys.RETURN) 126 | except Exception as e: 127 | agent.add_error_block(f"Failed to navigate to search engine: {e}") 128 | 129 | agent.generate_block("action", navigate_to_search_engine) 130 | 131 | bootstrap_callback(agent) 132 | -------------------------------------------------------------------------------- /agent/bootstraps/simple_convo_bootstrap.py: -------------------------------------------------------------------------------- 1 | #startblock type: orientation 2 | #timestamp 1724982545.6534579 3 | """ 4 | The first step is to set up the agent to have a turn-based conversation in the scratch.txt file. 5 | We need to read the last 25 lines of the scratch.txt file to get the latest conversation. 6 | Let's start by writing an action to read the file. 7 | """ 8 | #endblock 9 | #startblock type: task_inference 10 | #timestamp 1724982419.5427651 11 | # Current Task (which will get printed by the task-reminder block) 12 | # ID : 1 13 | # Title: Read the last 25 lines of scratch.txt 14 | # Description: Read the last 25 lines of the scratch.txt file to get the latest conversation. 15 | # Status: idle 16 | 17 | # New Tasks: 18 | agent.add_task( 19 | "Set up conversation by reading 25 lines from scratch file", 20 | "Read the last 25 lines of the scratch.txt file to get the latest conversation.", 21 | "idle" 22 | ) 23 | 24 | agent.add_task( 25 | "Have conversation with user until they decide to leave", 26 | "Engage in a conversation with the user until they decide to leave.", 27 | "idle" 28 | ) 29 | 30 | agent.add_task( 31 | "Wish user a good day and shut down", 32 | "Wish the user a good day and shut down the agent.", 33 | "idle" 34 | ) 35 | 36 | # Updates to Existing Task 37 | agent.current_task = agent.tasks.get_task(1) 38 | agent.current_task.going("Reading the last 25 lines of scratch.txt...") 39 | 40 | agent.current_task = agent.tasks.get_task(1) 41 | #endblock 42 | #startblock type: action 43 | #timestamp 1724982671.1163404 44 | 45 | def read_last_lines(agent): 46 | import os 47 | file_path = "scratch.txt" 48 | n = 25 49 | with open(file_path, 'r') as file: 50 | lines = file.readlines() 51 | # Actions normally have side effects but this is a demo 52 | lines[-n:] 53 | # Action callbacks should return a clear success signal for the outcome table 54 | return True 55 | 56 | agent.add_action("Read the last 25 lines as a demonstration of action callbacks", 57 | read_last_lines) 58 | #endblock 59 | #startblock type: expectation 60 | #timestamp 1724982799.3917813 61 | """ 62 | The action should succeed if the last 25 lines of the scratch.txt file are read correctly. 63 | The action should fail if the file does not exist or if the lines cannot be read. 64 | """ 65 | #endblock 66 | #startblock type: observation_inference 67 | #timestamp 1724982929.9047914 68 | 69 | def read_last_lines_observation(agent): 70 | """Display the last 25 lines of scratch.txt""" 71 | file_path = "scratch.txt" 72 | if os.path.exists(file_path): 73 | with open(file_path, 'r') as file: 74 | lines = file.readlines() 75 | return ''.join(lines[-25:]) 76 | return "" 77 | 78 | # Add the new view 79 | agent.add_observation_view({ 80 | "type": "observation", 81 | "title": "Last 25 Lines of scratch.txt", 82 | "callback": read_last_lines_observation 83 | }) 84 | #endblock 85 | #startblock type: evaluation 86 | #timestamp 1724983062.124238 87 | 88 | def check_last_lines_read(agent): 89 | file_path = "scratch.txt" 90 | return os.path.exists(file_path) and len(read_last_lines(file_path, 25)) == 25 91 | 92 | agent.add_evaluation( 93 | "Check Last 25 Lines Read", 94 | check_last_lines_read 95 | ) 96 | #endblock 97 | #startblock type: outcome 98 | # Unfortunately this isn't a real tick so it's inconvenient to get an outcome table 99 | # so pretend like you know our unit tests succeeded and let's move on. 100 | #endblock 101 | -------------------------------------------------------------------------------- /agent/bootstraps/sir_rowland_hill_bootstrap.py: -------------------------------------------------------------------------------- 1 | from tools.nano import WeaveNano 2 | 3 | #startblock type: orientation 4 | #timestamp 1747793894.8135977 5 | """ 6 | Unix Environment, Python Program, Agent, Book Report, 7 | Book Report About Sir Rowland Hill, Postal Reform, 8 | Social Conflict, How Rowland Hill Reform Amidst Social Conflict, 9 | Why Rowland Hill, Book Report With Text Editor, Sources Gathered, 10 | Public Domain Books, Read The Books And Write A Report About Them, 11 | Need To Download The Books, Books At MiniHF Website, Text File, 12 | hill.txt, End. 13 | 14 | Right now it's in my head rather than the context window but in the last phase 15 | we found sources that are 1) public domain and 2) tell the story of Sir Rowland 16 | Hill's postal reform. Now I need to explore these sources to figure out the 17 | answer to the research question "How did Sir Rowland Hill turn his ingenious 18 | idea for a cheap postal system into the government granting him control of the 19 | postal system.", my answer must be in the form of a text file, hill.txt and it 20 | has to cite specific passages from the available resources to support my answer. 21 | 22 | The five public domain sources I will be reviewing to try and determine the answer 23 | are: 24 | 25 | 1) https://minihf.com/assets/Sir_Rowland_Hill_K._C._B._a_biographical_and_historical_sketch.txt 26 | 2) https://minihf.com/assets/post_office_reform_its_importance_and_practicability.txt 27 | 3) https://minihf.com/assets/pg46498.txt 28 | 4) https://minihf.com/assets/pg55467.txt 29 | 5) https://minihf.com/assets/pg59390.txt 30 | 31 | I will download these with wget and then open them with the nano text editor to 32 | read. 33 | """ 34 | #endblock 35 | #startblock type: action 36 | #timestamp 1747795694.001662 37 | def action_download_and_open_resources(subagent): 38 | """Set up main agent while downloading and opening the resources.""" 39 | # Have main agent return contents of hill.txt as a string when it finishes 40 | schema = {"answer":"string"} 41 | # Create main agent for rest of run 42 | main_agent = agent.subagent("main", None, "See pinned bootstrap block", schema, args.budget) 43 | 44 | # Write out file names to prepare download and open for loop 45 | filenames = ["Sir_Rowland_Hill_K._C._B._a_biographical_and_historical_sketch.txt", 46 | "post_office_reform_its_importance_and_practicability.txt", 47 | "pg46498.txt", 48 | "pg55467.txt", 49 | "pg59390.txt"] 50 | # Store filenames for easy recall in future actions 51 | main_agent.update_cache("filenames", filenames) 52 | 53 | # Download and open files in nano editor 54 | for filename in filenames: 55 | os.system(f"wget https://minihf.com/assets/{filename}") 56 | WeaveNano(main_agent, filename) 57 | 58 | # Demonstrate retrieving and sending commands to editors 59 | editors = [main_agent.tools[f"nano-/app/{filename}"] for filename in filenames] 60 | # Send three page down keys to each instance of the nano editor 61 | # this way we get past the Gutenberg intros etc 62 | # Wait how do I use tmux send-keys again? Lets recall the docs: 63 | # """ 64 | # When specifying keys, most represent themselves (for example ‘A’ to ‘Z’). 65 | # Ctrl keys may be prefixed with ‘C-’ or ‘^’, Shift keys with ‘S-’ and 66 | # Alt (meta) with ‘M-’. In addition, the following special key names are 67 | # accepted: Up, Down, Left, Right, BSpace, BTab, DC (Delete), End, Enter, 68 | # Escape, F1 to F12, Home, IC (Insert), NPage/PageDown/PgDn, PPage/PageUp/PgUp, 69 | # Space, and Tab. 70 | # """ 71 | commands = ["PgDn", "PgDn", "PgDn"] 72 | for editor in editors: 73 | for command in commands: 74 | editor.send_command(command) 75 | 76 | # Open scratch.txt scratchpad and hill.txt answer file 77 | WeaveNano(main_agent, "scratch.txt") 78 | WeaveNano(main_agent, "hill.txt") 79 | 80 | return True 81 | 82 | self.add_action("Set up main agent and scroll down in editors", action_download_and_open_resources) 83 | #endblock 84 | #startblock type: expectation 85 | #timestamp 1747797207.3600123 86 | """ 87 | I expect to see five instances of the weave nano wrapper with text from books in 88 | them. I also expect to see two more editor instances, one for a scratchpad and 89 | one for a final answer file. My evaluation could check that each of the seven 90 | expected editors is a valid keys in 'main_agent.tools'. I could also check for the 91 | existence of the five downloaded files in my local working directory with 'os.path.exists()'. 92 | """ 93 | #endblock 94 | #q: Do I need to set up or tear down any observation callbacks? No. (95.8%) 95 | -------------------------------------------------------------------------------- /agent/bootstraps/test_vigenere.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from vigenere import vigenere_encrypt, vigenere_decrypt 3 | 4 | class TestVigenereCipher(unittest.TestCase): 5 | 6 | def test_encryption(self): 7 | plaintext = "attackatdawn" 8 | key = "LEMONLEMONLE" 9 | expected_ciphertext = "LXFOPVEFRNHR" 10 | encrypted = vigenere_encrypt(plaintext, key) 11 | self.assertEqual(encrypted, expected_ciphertext) 12 | 13 | def test_decryption(self): 14 | ciphertext = "LXFOPVEFRNHR" 15 | key = "LEMONLEMONLE" 16 | expected_plaintext = "attackatdawn" 17 | decrypted = vigenere_decrypt(ciphertext, key) 18 | self.assertEqual(decrypted, expected_plaintext) 19 | 20 | if __name__ == '__main__': 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /agent/bootstraps/tictactoe_single_bootstrap.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import threading 4 | import time 5 | from http.server import HTTPServer 6 | from bootstraps.tictactoe_server import TicTacToeHandler 7 | 8 | # Start the server in a separate thread 9 | server = HTTPServer(('localhost', 8000), TicTacToeHandler) 10 | server_thread = threading.Thread(target=server.serve_forever) 11 | server_thread.daemon = True 12 | server_thread.start() 13 | time.sleep(1) # Give the server some time to start 14 | 15 | # Start a new game against the basic AI 16 | response = requests.post("http://localhost:8000/start", json={"ai": "basic"}) 17 | assert response.status_code == 200 18 | 19 | #startblock type: orientation 20 | #timestamp 1724982545.6534579 21 | """ 22 | WEAVER [P: EXPECTATION], I'm in a game of tic tac toe against a dumb opponent. 23 | I want to win the game and then return to parent. The game is being played 24 | on a HTTP server served on localhost 8000. 25 | 26 | WEAVER [P: CLARIFICATION], How do I make a move? 27 | 28 | WEAVER [P: EXPOSITION], You make a move using the /move endpoint and the requests 29 | library. For example: `requests.post("http://localhost:8000/move", json={"move": 4})` 30 | lets us take the center of the board. 31 | 32 | WEAVER [P: CLARIFICATION], How do I get the board state? 33 | 34 | WEAVER [P: EXPOSITION], You use the /board endpoint, which returns a JSON in this 35 | format: {"board": ["O", " ", " ", " ", "X", " ", " ", " ", " "]} Keep in mind that 36 | the empty spaces on the board are a space string rather than none or empty string. 37 | 38 | WEAVER [P: RATIONAL], And I get the first move, so I can take the center? 39 | 40 | WEAVER [P: EXPOSITION], Yes, we can take the center of the board. 41 | 42 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy: 43 | 1. Make a move to take the center of the board since that's the best opening move. 44 | 2. Continue making moves based on the current state of the board. 45 | 46 | I will use the /board endpoint to observe the current state of the board. 47 | Once I have won or run out of time I will return to parent. 48 | """ 49 | #endblock 50 | #startblock type: action 51 | #timestamp 1724982671.1163404 52 | """ 53 | I'll set up the main agent to play the rest of the game and make the first move to 54 | take the center of the board. 55 | """ 56 | 57 | def main_agent_and_move(subagent): 58 | def check_beaten_ai(subagent, ai_type): 59 | """Check if the specified AI has been beaten twice in a row.""" 60 | response = requests.get("http://localhost:8000/history?n=10") 61 | if response.status_code != 200: 62 | return False 63 | 64 | games = response.json() 65 | ai_wins = [game for game in games if game[4] == ai_type and game[3] == 'X'] 66 | return len(ai_wins) >= 1 67 | 68 | # Avoid using lambda because we need named unit test to satisfy return_to_caller 69 | def victory(subagent): 70 | return check_beaten_ai(subagent, "basic_ai") 71 | 72 | schema = {"victory":"boolean"} 73 | main_agent = agent.subagent("main", None, "Beat basic AI at Tic-Tac-Toe", schema, args.budget) 74 | main_agent.task.add_evaluation("Check beaten basic AI", victory) 75 | 76 | def view_current_board(subagent): 77 | response = requests.get("http://localhost:8000/board") 78 | if response.status_code == 200: 79 | return response.json()["board"] 80 | return None 81 | 82 | # Add the new view to the main agent instead of self since it's about to be executed 83 | main_agent.add_observation_view("View current game board", view_current_board) 84 | 85 | # Make the first move to take the center of the board 86 | move_response = requests.post("http://localhost:8000/move", json={"move": 4}) 87 | if move_response.status_code != 200: 88 | return False 89 | return True 90 | 91 | self.add_action("Set up main agent and make first move", main_agent_and_move) 92 | #endblock 93 | #startblock type: expectation 94 | #timestamp 1724982799.3917813 95 | """ 96 | The action should succeed if the game starts and the first move is made successfully. 97 | The action should fail if the game does not start or the first move is not made successfully. 98 | """ 99 | #endblock 100 | #startblock type: evaluation 101 | #timestamp 1724983062.124238 102 | 103 | def check_game_started(subagent): 104 | """Check if the game has started successfully.""" 105 | response = requests.get("http://localhost:8000/board") 106 | if response.status_code == 200: 107 | board = response.json()["board"] 108 | else: 109 | return False 110 | return board[4] == 'X' 111 | 112 | # Add action evaluations 113 | self.add_evaluation( 114 | "Check game started", 115 | check_game_started 116 | ) 117 | #endblock 118 | -------------------------------------------------------------------------------- /agent/bootstraps/tictactoe_single_discord_bootstrap.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import threading 4 | import time 5 | from http.server import HTTPServer 6 | from bootstraps.tictactoe_server import TicTacToeHandler 7 | from tools.discord import WeaveDiscordClient 8 | 9 | # Start the server in a separate thread 10 | server = HTTPServer(('localhost', 8000), TicTacToeHandler) 11 | server_thread = threading.Thread(target=server.serve_forever) 12 | server_thread.daemon = True 13 | server_thread.start() 14 | time.sleep(1) # Give the server some time to start 15 | 16 | # Start a new game against the basic AI 17 | response = requests.post("http://localhost:8000/start", json={"ai": "basic"}) 18 | assert response.status_code == 200 19 | 20 | #startblock type: orientation 21 | #timestamp 1724982545.6534579 22 | """ 23 | WEAVER [P: EXPECTATION], I'm in a game of tic tac toe against a dumb opponent. 24 | I want to win the game and then return to parent. The game is being played 25 | on a HTTP server served on localhost 8000. 26 | 27 | WEAVER [P: CLARIFICATION], How do I make a move? 28 | 29 | WEAVER [P: EXPOSITION], You make a move using the /move endpoint and the requests 30 | library. For example: `requests.post("http://localhost:8000/move", json={"move": 4})` 31 | lets us take the center of the board. 32 | 33 | WEAVER [P: CLARIFICATION], How do I get the board state? 34 | 35 | WEAVER [P: EXPOSITION], You use the /board endpoint, which returns a JSON in this 36 | format: {"board": ["O", " ", " ", " ", "X", " ", " ", " ", " "]} Keep in mind that 37 | the empty spaces on the board are a space string rather than none or empty string. 38 | 39 | WEAVER [P: RATIONAL], And I get the first move, so I can take the center? 40 | 41 | WEAVER [P: EXPOSITION], Yes, we can take the center of the board. 42 | 43 | WEAVER [P: CONCLUSION], Alright then I will use the following strategy: 44 | 1. Make a move to take the center of the board since that's the best opening move. 45 | 2. Continue making moves based on the current state of the board. 46 | 47 | I will use the /board endpoint to observe the current state of the board. 48 | Once I have won or run out of time I will return to parent. 49 | """ 50 | #endblock 51 | #startblock type: action 52 | #timestamp 1724982671.1163404 53 | """ 54 | I'll set up the main agent to play the rest of the game and make the first move to 55 | take the center of the board. 56 | """ 57 | 58 | def main_agent_and_move(subagent): 59 | def check_beaten_ai(subagent, ai_type): 60 | """Check if the specified AI has been beaten twice in a row.""" 61 | response = requests.get("http://localhost:8000/history?n=10") 62 | if response.status_code != 200: 63 | return False 64 | 65 | games = response.json() 66 | ai_wins = [game for game in games if game[4] == ai_type and game[3] == 'X'] 67 | return len(ai_wins) >= 1 68 | 69 | # Avoid using lambda because we need named unit test to satisfy return_to_caller 70 | def victory(subagent): 71 | return check_beaten_ai(subagent, "basic_ai") 72 | 73 | schema = {"victory":"boolean"} 74 | main_agent = agent.subagent("main", None, "Beat basic AI at Tic-Tac-Toe", schema, args.budget) 75 | main_agent.task.add_evaluation("Check beaten basic AI", victory) 76 | 77 | def view_current_board(subagent): 78 | response = requests.get("http://localhost:8000/board") 79 | if response.status_code == 200: 80 | return response.json()["board"] 81 | return None 82 | 83 | # Add the new view to the main agent instead of self since it's about to be executed 84 | main_agent.add_observation_view("View current game board", view_current_board) 85 | 86 | # Add discord communication line to bot operator 87 | with open("discord.json") as infile: 88 | credentials = json.load(infile) 89 | chat = WeaveDiscordClient(main_agent, credentials["key"], credentials["cid"]) 90 | time.sleep(5) 91 | chat.send_message("The weave-agent is online. Taking board center...") 92 | # Note to self: I can access the chat object to send a message by doing 93 | # self.tools[f"discord-bot-{channel_id}"] where channel id is taken from 94 | # the top of the observation window associated with the chat. 95 | 96 | # Make the first move to take the center of the board 97 | move_response = requests.post("http://localhost:8000/move", json={"move": 4}) 98 | if move_response.status_code != 200: 99 | return False 100 | return True 101 | 102 | self.add_action("Set up main agent and make first move", main_agent_and_move) 103 | #endblock 104 | #startblock type: expectation 105 | #timestamp 1724982799.3917813 106 | """ 107 | The action should succeed if the game starts and the first move is made successfully. 108 | The action should fail if the game does not start or the first move is not made successfully. 109 | """ 110 | #endblock 111 | #startblock type: evaluation 112 | #timestamp 1724983062.124238 113 | 114 | def check_game_started(subagent): 115 | """Check if the game has started successfully.""" 116 | response = requests.get("http://localhost:8000/board") 117 | if response.status_code == 200: 118 | board = response.json()["board"] 119 | else: 120 | return False 121 | return board[4] == 'X' 122 | 123 | # Add action evaluations 124 | self.add_evaluation( 125 | "Check game started", 126 | check_game_started 127 | ) 128 | #endblock 129 | -------------------------------------------------------------------------------- /agent/bootstraps/vigenere.py: -------------------------------------------------------------------------------- 1 | def vigenere_encrypt(plaintext, key): 2 | encrypted_text = [] 3 | key_length = len(key) 4 | key_as_int = [ord(i) - 65 for i in key.upper()] 5 | plaintext_int = [ord(i) - 97 for i in plaintext.lower()] 6 | for i in range(len(plaintext_int)): 7 | value = (plaintext_int[i] + key_as_int[i % key_length]) % 26 8 | encrypted_text.append(chr(value + 65)) 9 | return "".join(encrypted_text) 10 | 11 | def vigenere_decrypt(ciphertext, key): 12 | decrypted_text = [] 13 | key_length = len(key) 14 | key_as_int = [ord(i) - 65 for i in key.upper()] 15 | ciphertext_int = [ord(i) - 65 for i in ciphertext.upper()] 16 | for i in range(len(ciphertext_int)): 17 | value = (ciphertext_int[i] - key_as_int[i % key_length]) % 26 18 | decrypted_text.append(chr(value + 97)) 19 | return "".join(decrypted_text) 20 | 21 | if __name__ == "__main__": 22 | # Example usage: 23 | plaintext = "attackatdawn" 24 | key = "LEMONLEMONLE" 25 | 26 | encrypted = vigenere_encrypt(plaintext, key) 27 | print(f"Encrypted: {encrypted}") 28 | 29 | decrypted = vigenere_decrypt(encrypted, key) 30 | print(f"Decrypted: {decrypted}") 31 | -------------------------------------------------------------------------------- /agent/bootstraps/zombie_game_bootstrap.py: -------------------------------------------------------------------------------- 1 | import os 2 | import libtmux 3 | import time 4 | from tools.weave_zombie_game import WeaveZombieGame 5 | 6 | #startblock type: orientation 7 | #timestamp 1724982545.6534579 8 | """ 9 | WEAVER [P: EXPECTATION], The task is to create an agent that can survive as long as possible in the zombie survival game. The agent must learn to manage resources, explore locations, and defend against hordes. 10 | 11 | WEAVER [P: CLARIFICATION], How do we track game state and make strategic decisions? 12 | 13 | WEAVER [P: EXPOSITION], The game state is visible through the tmux wrapper's observation view showing health, hunger, inventory, and location connections. Core strategies involve balancing scavenging risks with resource needs. 14 | 15 | WEAVER [P: RATIONAL], To bootstrap competence, we'll focus on: 16 | 1. Early game resource collection patterns 17 | 2. Safe location rotation to prevent depletion 18 | 3. Optimal barricade crafting thresholds 19 | 4. Hunger management heuristics 20 | 21 | WEAVER [P: CLARIFICATION], What should I do once the game is over? 22 | 23 | WEAVER [P: TRAINER], Restart it with send_keys("python3 /app/tools/zombie_game.py") 24 | 25 | WEAVER [P: CONCLUSION], I will create a main subagent with survival-focused evaluations and demonstrate core gameplay loops through initial actions. 26 | """ 27 | #endblock 28 | 29 | #startblock type: action 30 | #timestamp 1724982671.1163404 31 | """Initialize the zombie game and demonstrate basic survival strategies.""" 32 | 33 | def bootstrap_zombie_agent(subagent): 34 | schema = { 35 | "survival_days": "integer", 36 | "locations_explored": "integer", 37 | "zombies_killed": "integer" 38 | } 39 | 40 | main_agent = agent.subagent( 41 | "main", 42 | None, 43 | "Maximize survival days through adaptive strategies", 44 | schema, 45 | args.budget 46 | ) 47 | 48 | # Store values for evaluations, I only do this in the bootstrap block i.e. now 49 | self.cache["do_not_copy_this_in_main_agent"] = main_agent.tools 50 | 51 | # Initialize game session 52 | game = WeaveZombieGame(main_agent) 53 | 54 | assert main_agent.tools["zombie_game"], "Did zombie game tool attach to agent?" 55 | 56 | # Demonstrate core gameplay loop 57 | initial_commands = [ 58 | "scavenge" 59 | ] 60 | 61 | assert hasattr(game, "send_command"), "Is send_command a method on game object?" 62 | 63 | for cmd in initial_commands: 64 | game.send_command(cmd) 65 | time.sleep(0.5) # Allow game state updates 66 | 67 | return True 68 | 69 | self.add_action("Initialize zombie survival agent", bootstrap_zombie_agent) 70 | #endblock 71 | 72 | #startblock type: expectation 73 | #timestamp 1724982799.3917813 74 | """ 75 | The action should succeed if: 76 | - Game session initializes in tmux correctly 77 | - All commands execute without errors 78 | - Inventory shows collected resources 79 | - At least 1 new location is explored 80 | - Hunger remains above starvation threshold 81 | 82 | The action should fail if: 83 | - Tmux session fails to start 84 | - Commands result in fatal errors 85 | - Player dies during demonstration 86 | - Resource collection fails completely 87 | """ 88 | #endblock 89 | 90 | #startblock type: evaluation 91 | #timestamp 1724983062.124238 92 | 93 | def check_game_running(agent): 94 | try: 95 | return bool(self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].session) 96 | except Exception: 97 | return False 98 | 99 | self.add_evaluation("Is the game session active?", check_game_running) 100 | 101 | def check_initial_commands(agent): 102 | try: 103 | history = self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].command_history 104 | return len(history) >= 1 105 | except KeyError: 106 | return False 107 | 108 | self.add_evaluation("Were the commands executed?", check_initial_commands) 109 | 110 | def check_resource_collection(agent): 111 | try: 112 | inventory = self.cache["do_not_copy_this_in_main_agent"]["zombie_game"].last_state["inventory"] 113 | return inventory.get("food", 0) > 1 and inventory.get("scrap", 0) > 2 114 | except KeyError: 115 | return False 116 | 117 | self.add_evaluation("Did we collect resources?", check_resource_collection) 118 | #endblock 119 | 120 | #startblock type: observation-inference 121 | #timestamp 1737284368.573438 122 | # The zombie game wrapper already provides full observation through its render method 123 | # No additional observation callbacks needed - maintain existing game state visualization 124 | #endblock 125 | -------------------------------------------------------------------------------- /agent/cache_hf.py: -------------------------------------------------------------------------------- 1 | import os 2 | from argparse import ArgumentParser 3 | from transformers import AutoTokenizer, AutoModelForMaskedLM 4 | 5 | parser = ArgumentParser() 6 | parser.add_argument("tokenizer") 7 | args = parser.parse_args() 8 | 9 | if os.path.exists("hf_token.txt"): 10 | with open("hf_token.txt") as infile: 11 | token = infile.read().strip() 12 | 13 | AutoTokenizer.from_pretrained(args.tokenizer, token=token) 14 | -------------------------------------------------------------------------------- /agent/docs/weave_agent_vs_react.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/agent/docs/weave_agent_vs_react.png -------------------------------------------------------------------------------- /agent/error_stems.txt: -------------------------------------------------------------------------------- 1 | The error at {timestamp} could be caused by one of the following: 2 | Here is a list of the different hypothesis that could have caused the error around {timestamp} 3 | Thinking step by step about the previous error I notice: 4 | Before attending to the error at {timestamp} let's consider its possible causes. It 5 | The error above seems related to 6 | I wonder if the error near {timestamp} is caused by 7 | So guys what do you make of the error message in the above error block? 8 | Let's analyze the error seen at {timestamp}. My first thought is 9 | Before going any further I'll break the error above into parts. 10 | It's important to discuss the error we observe before jumping into solutions. It seems 11 | Analyzing the error(s) in the trace near {timestamp} I observe 12 | The error code code in the last tick suggests that the action 13 | I can avoid the error(s) I introduced in the previous action 14 | Hm. Let's think step by step about the error(s). 15 | I can see there was an error at {timestamp}, let's analyze it by 16 | Maybe I can avoid causing that error again by 17 | My first hypothesis is that the previous error was caused by 18 | -------------------------------------------------------------------------------- /agent/eval_rubrics/action.txt: -------------------------------------------------------------------------------- 1 | Is this python action codeblock a step towards resolving the problems identified in the orientation block? 2 | Is this action codeblock 1) using proper delegation to subagents in the WeaveAgentTree style OR 2) a step towards resolving the problems identified in the orientation block 2.1) with appropriate tool use that 2.2) exports problem state into the environment by creating files and external programs? 3 | Is this code block a valid python program, does it follow the python syntax and semantics? 4 | Will this action block gather information in anticipation of or directly advance the problem state? 5 | Does this action block demonstrate understanding of the weave-agent framework by using callbacks, system APIs, etc to solve problems? 6 | Are actions suggested by the orientation block taken by this action block? 7 | -------------------------------------------------------------------------------- /agent/eval_rubrics/backtrack.txt: -------------------------------------------------------------------------------- 1 | Does this backtrack block step back from the current problem context and chart an alternate path to my goal? 2 | -------------------------------------------------------------------------------- /agent/eval_rubrics/debug.txt: -------------------------------------------------------------------------------- 1 | Does this debug block 1) reason about and engage with the error message & sensory evidence for why the action failed 2) enumerate a set of plausible hypothesis 3) rule out any hypothesis that have been disproven by available evidence 4) propose a concrete action or test that could be taken to differentially diagnose which of the enumerated hypothesis is true? 2 | -------------------------------------------------------------------------------- /agent/eval_rubrics/evaluation.txt: -------------------------------------------------------------------------------- 1 | Is this evaluation a good implementation of a callback to gather and evaluate the expected sensory evidence laid out in the expectation block? 2 | Is this code block a valid python program, does it follow the python syntax and semantics? 3 | Is the code block structured as an evaluation callback with a call to `.add_evaluation()` to add it to the current tick's evaluation suite? 4 | Does this evaluation block gather the necessary sensory evidence to make a judgment? 5 | Does this evaluation block callback return an appropriate value (i.e. number between 0 and 1) for inclusion in the outcome block table for this tick? 6 | Does the expectation block avoid using logit evaluators for symbolic questions like "length of a file" that would be better answered by unit tests? 7 | If this evaluation block uses logit evaluators does it use them appropriately for vibe-y tasks that are primarily questions of subjective judgment or difficult to formalize? 8 | -------------------------------------------------------------------------------- /agent/eval_rubrics/expectation.txt: -------------------------------------------------------------------------------- 1 | Does this expectation block enumerate a strong set of evaluable concrete sensory experiences that would indicate success or failure of the action? 2 | Is the expectation block a properly escaped triple quote, could it be included in a python file without error? 3 | Are the expectations laid out by the expectations block things that sensory evidence could be gathered to evaluate? 4 | Does the expectation block demonstrate an understanding of what kinds of thing can be checked with symbolic programs? 5 | Does the expectation block demonstrate creativity in thinking of valid ways to check whether the action succeeded? 6 | Does the expectation block lay out specific sensory details and evidence it expects to encounter if the action succeeds? 7 | Are the suggested evaluations in this expectation block relevant, do they get at the cruxes of whether the action succeeded or not? 8 | -------------------------------------------------------------------------------- /agent/eval_rubrics/observation-inference.txt: -------------------------------------------------------------------------------- 1 | Does the above observation_inference code block prepare the agent to judge the outcome of the action on the next tick? 2 | -------------------------------------------------------------------------------- /agent/eval_rubrics/orientation.txt: -------------------------------------------------------------------------------- 1 | Does the above orientation block write out a word association that narrows in on the core features of the situation, reason about goal prioritization in light of those features, and then suggest a course of action to advance the problem state? 2 | Is this orientation block a 1) discussion 2) demonstrating high quality reasoning 3) that identifies the cruxes and essential features of the current problem 4) which ends by proposing a reasonable solution? 3 | Does the orientation block summarize the salient parts of what happened in the previous tick(s)? 4 | Does the orientation block identify the cruxes and essential features of the current problem? 5 | Does the orientation block demonstrate good thinking, is it an example of high quality reasoning? 6 | Are the actions proposed by the orientation block reasonable, do you expect them to advance the problem? 7 | -------------------------------------------------------------------------------- /agent/eval_rubrics/task-inference.txt: -------------------------------------------------------------------------------- 1 | Does this task inference code block update the kanban board to the correct task state? 2 | Is this code block a valid python program, does it follow the python syntax and semantics? 3 | If the current task needs to be changed does the code change it, if not does it keep it? 4 | Does the code block mark any completed tasks as completed and avoid falsely marking tasks completed? 5 | Does the code block shutdown if all the tasks have been completed and avoid shutdown otherwise? 6 | Does the task inference code change the status of all tasks whose status has changed and keep others the same? 7 | Is this task inference code likely to make the situation the weave-agent works with clearer on future ticks? 8 | Does this task inference block logically follow from the outcome block of the previous tick? 9 | -------------------------------------------------------------------------------- /agent/long_backtrack_stems.txt: -------------------------------------------------------------------------------- 1 | This doesn't seem to be working. What's another approach I could take? 2 | I notice I'm confused. Let's break down the problem and think step by step about whether there's a simpler way to accomplish my goal. 3 | That action block looks pretty bad, let's backtrack: What could I have done instead? 4 | What is the end goal of what I'm doing right now and how is what I'm doing meant to accomplish that end? 5 | Am I doom looping right now? I notice 6 | This action block clearly isn't right, what made me think this was a good idea? 7 | This clearly isn't the right way. What if 8 | -------------------------------------------------------------------------------- /agent/render_agent_trace.py: -------------------------------------------------------------------------------- 1 | import random 2 | import json 3 | from argparse import ArgumentParser 4 | import torch 5 | from render_block import render_block 6 | 7 | parser = ArgumentParser() 8 | parser.add_argument("trace", help="The JSON of the event blocks from the weave-agent.") 9 | args = parser.parse_args() 10 | 11 | with open(args.trace) as infile: 12 | events = json.load(infile) 13 | 14 | context = "" 15 | for event_block in events: 16 | context += render_block(event_block) 17 | 18 | print(context) 19 | 20 | -------------------------------------------------------------------------------- /agent/reproduce_vllm_bug_partial_utf8.py: -------------------------------------------------------------------------------- 1 | import random 2 | import json 3 | import requests 4 | 5 | prompts = requests.get("https://minihf.com/vllm_utf8_logprobs_error_reproduce_prompts.json").json() 6 | 7 | port = 5001 8 | n = 1 9 | model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" 10 | 11 | payload = { 12 | "n": n, 13 | "temperature": 1, 14 | "top_k": 50, 15 | "repetition_penalty": 1.02, 16 | "max_tokens": 1, 17 | "model": model_name, 18 | "prompt": prompts, 19 | "stream": False, 20 | "logprobs": 100, 21 | "seed": random.randrange(1000000) 22 | } 23 | 24 | print("With logprobs = 100") 25 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n") 26 | 27 | payload["logprobs"] = 0 28 | print("With logprobs = 0") 29 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n") 30 | 31 | no_unicode = prompts[0].replace("\u2019", "'").replace("\U0001f642", ":)").replace("\u201c", '').replace("\u201d", '"').replace("\u2014", "-") 32 | assert no_unicode.encode("ascii") 33 | prompts2 = [no_unicode,] 34 | payload["logprobs"] = 100 35 | payload["prompts"] = prompts2 36 | print("With no unicode in input prompt string") 37 | print(requests.post(f"http://localhost:{port}/v1/completions/", json=payload).json(), end="\n\n") 38 | -------------------------------------------------------------------------------- /agent/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp 2 | selenium 3 | pyautogui 4 | beautifulsoup4 5 | requests 6 | torch 7 | numpy 8 | tqdm 9 | rich 10 | transformers 11 | protobuf 12 | sentencepiece 13 | aiosqlite 14 | sqlite-vec 15 | jsonschema 16 | nextcord 17 | libtmux 18 | -------------------------------------------------------------------------------- /agent/run_without_errors_questions.txt: -------------------------------------------------------------------------------- 1 | Will this block run without errors? 2 | Does this block successfully run? 3 | Can this program run without errors on this runtime state? 4 | Will the callback above complete? 5 | Will this callback run successfully? 6 | Is the above program error-free? 7 | -------------------------------------------------------------------------------- /agent/scratch.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/agent/scratch.txt -------------------------------------------------------------------------------- /agent/sleep.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import hashlib 4 | import asyncio 5 | from retrieval import ModernBertRag # Assuming your ModernBertRag is in this module 6 | from argparse import ArgumentParser 7 | 8 | class MockWeaveAgentTree: 9 | """Mock tree that just tracks summaries in memory""" 10 | def __init__(self, model_name): 11 | self.model_name = model_name 12 | self.summaries = [] 13 | 14 | def add_summary(self, summary): 15 | self.summaries.append(summary) 16 | print(f"Added summary: {summary[0]}") 17 | 18 | async def bootstrap_rag_memories(model_name): 19 | # Initialize mock tree and RAG system 20 | mock_tree = MockWeaveAgentTree(model_name) 21 | rag = ModernBertRag(mock_tree, db_path="blocks.db") 22 | await rag.setup() 23 | 24 | # Load example blocks 25 | example_dir = "./bootstraps/example_blocks" 26 | example_files = [f for f in os.listdir(example_dir) if f.endswith(".json")] 27 | 28 | for filename in example_files: 29 | json_path = os.path.join(example_dir, filename) 30 | code_path = os.path.join(example_dir, filename[:-5] + ".py") 31 | 32 | with open(json_path) as f: 33 | metadata = json.load(f) 34 | with open(code_path) as f: 35 | render_content = f.read() 36 | 37 | # Create unique ID from render content 38 | sha = hashlib.sha256() 39 | sha.update(render_content.encode('utf-8')) 40 | block_id = sha.hexdigest() 41 | 42 | # Check if block already exists 43 | conn = await rag._connect() 44 | cursor = await conn.cursor() 45 | await cursor.execute("SELECT 1 FROM blocks WHERE block_id=?", (block_id,)) 46 | exists = await cursor.fetchone() is not None 47 | await cursor.close() 48 | await conn.close() 49 | 50 | if exists: 51 | print(f"Block {block_id[:8]}... already exists, skipping") 52 | continue 53 | 54 | # Create the item structure ModernBERT-RAG expects 55 | rag_item = { 56 | "id": block_id, 57 | "render": render_content, 58 | "context": metadata.get("context", ""), 59 | "type": metadata.get("type", "code_block"), 60 | "q": metadata.get("q", ""), 61 | "score": metadata.get("score", 0.0), 62 | "_index": metadata.get("index", 0), 63 | "timestamp": metadata.get("timestamp", 0.0) 64 | } 65 | 66 | # Add to processing queue and process immediately 67 | rag.add(rag_item) 68 | processed_id = await rag.process_item() 69 | 70 | if processed_id: 71 | print(f"Successfully added memory block {processed_id[:8]}...") 72 | else: 73 | print(f"Failed to process block {block_id[:8]}...") 74 | 75 | if __name__ == "__main__": 76 | parser = ArgumentParser() 77 | parser.add_argument("model_name") 78 | args = parser.parse_args() 79 | asyncio.run(bootstrap_rag_memories(args.model_name)) 80 | print("Bootstrap memories added!") 81 | -------------------------------------------------------------------------------- /agent/templates/describe1.txt: -------------------------------------------------------------------------------- 1 | User 2 | 3 | Summarize the features of the following action taken by a Weave-Agent ReAct LLM agent in English. Your summary will be used for retrieval, so write out all names of relevant objects, variables, functions etc referenced. Write using precise phrasing, proper names, and jargon so that related blocks can be queried by shared traits and features. 4 | 5 | ``` 6 | {rendered_block} 7 | ``` 8 | 9 | - Do not use markdown formatting and avoid quote marking names and variables. 10 | 11 | - Put your summary in a SGML/XML type start tag and end tag. 12 | 13 | - Write about a paragraph or two of English prose describing the block using precise phrasing and proper names as instructed above. 14 | 15 | Assistant 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /agent/templates/describe2.txt: -------------------------------------------------------------------------------- 1 | User 2 | 3 | Summarize the features of the following action taken by a Weave-Agent ReAct LLM agent in English. Your summary will be used for retrieval, so write out all names of relevant objects, variables, functions etc referenced. Write using precise phrasing, proper names, and jargon so that related blocks can be queried by shared traits and features. 4 | 5 | ``` 6 | {rendered_block} 7 | ``` 8 | 9 | - Do not use markdown formatting and avoid quote marking names and variables. 10 | 11 | - Put your summary in a SGML/XML type start tag and end tag. 12 | 13 | - Write about a paragraph or two of English prose describing the block using precise phrasing and proper names as instructed above. 14 | 15 | Assistant 16 | 17 | 18 | {object_description} 19 | 20 | 21 | User 22 | 23 | Alright here is the context that block appears in. Write another summary focusing on the block in this context. This is a continuation of the previous summary so avoid redundant statements you already made in the first one. The purpose of this continuation is so that it is possible to retrieve the block in a similar situation so again use proper names, precise phrasing, jargon, etc in relation to the context that would cause this block to be retrieved if a similar situation arose. 24 | 25 | ``` 26 | {rendered_context} 27 | ``` 28 | 29 | Assistant 30 | 31 | 32 | -------------------------------------------------------------------------------- /agent/test_block_extractors.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from block_generators import extract_function_and_add_action_or_evaluation 3 | 4 | class TestExtractFunctionAndAddAction(unittest.TestCase): 5 | 6 | def test_normal_action_block(self): 7 | code = """ 8 | def example_function(agent): 9 | pass 10 | 11 | agent.add_action('Example Action', example_function) 12 | """ 13 | function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action") 14 | expected_function_code = """ 15 | def example_function(agent): 16 | pass 17 | """ 18 | expected_add_action_code = "agent.add_action('Example Action', example_function)" 19 | self.assertEqual(function_code.strip(), expected_function_code.strip()) 20 | self.assertEqual(add_action_code.strip(), expected_add_action_code.strip()) 21 | 22 | def test_action_block_with_unrelated_code_after(self): 23 | code = """ 24 | def example_function(agent): 25 | pass 26 | 27 | agent.add_action('Example Action', example_function) 28 | 29 | # Unrelated code 30 | def unrelated_function(): 31 | pass 32 | 33 | print("Unrelated statement") 34 | """ 35 | function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action") 36 | expected_function_code = """ 37 | def example_function(agent): 38 | pass 39 | """ 40 | expected_add_action_code = "agent.add_action('Example Action', example_function)" 41 | self.assertEqual(function_code.strip(), expected_function_code.strip()) 42 | self.assertEqual(add_action_code.strip(), expected_add_action_code.strip()) 43 | 44 | def test_action_block_with_unrelated_expression_between(self): 45 | code = """ 46 | def example_function(agent): 47 | pass 48 | 49 | # Unrelated expression 50 | print("Unrelated expression") 51 | 52 | agent.add_action('Example Action', example_function) 53 | """ 54 | function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action") 55 | expected_function_code = """ 56 | def example_function(agent): 57 | pass 58 | """ 59 | expected_add_action_code = "agent.add_action('Example Action', example_function)" 60 | self.assertEqual(function_code.strip(), expected_function_code.strip()) 61 | self.assertEqual(add_action_code.strip(), expected_add_action_code.strip()) 62 | 63 | def test_action_block_with_unrelated_statement_between(self): 64 | code = """ 65 | def example_function(agent): 66 | pass 67 | 68 | # Unrelated statement 69 | x = 42 70 | 71 | agent.add_action('Example Action', example_function) 72 | """ 73 | function_code, add_action_code = extract_function_and_add_action_or_evaluation(code, "add_action") 74 | expected_function_code = """ 75 | def example_function(agent): 76 | pass 77 | """ 78 | expected_add_action_code = "agent.add_action('Example Action', example_function)" 79 | self.assertEqual(function_code.strip(), expected_function_code.strip()) 80 | self.assertEqual(add_action_code.strip(), expected_add_action_code.strip()) 81 | 82 | if __name__ == '__main__': 83 | unittest.main() 84 | -------------------------------------------------------------------------------- /agent/test_rl_reward_penalties.py: -------------------------------------------------------------------------------- 1 | # Unit tests 2 | import unittest 3 | from prepare_rl_set_from_traces import process_trace_rewards 4 | 5 | class TestRewardProcessing(unittest.TestCase): 6 | def setUp(self): 7 | self.sample_trace1 = [ 8 | {"type": "orientation", "score": 2.5, "index": 0}, 9 | {"type": "action", "score": 1.8, "index": 1}, 10 | {"type": "expectation", "score": 3.0, "index": 2} 11 | ] 12 | 13 | self.sample_trace2 = [ 14 | {"type": "orientation", "score": 3.5, "index": 0}, 15 | {"type": "action", "score": 2.0, "index": 1, 16 | "outcome": {"error": "AssertionError"}}, 17 | {"type": "error", "index": 2}, 18 | {"type": "debug", "score": 2.5, "index": 3}, 19 | {"type": "action", "score": 2.2, "index": 4}, 20 | {"type": "expectation", "score": 3.0, "index": 5} 21 | ] 22 | 23 | self.sample_trace3 = [ 24 | {"type": "orientation", "score": 2.0, "index": 0}, 25 | {"type": "action", "score": 1.8, "index": 1, 26 | "outcome": {"error": "TypeError"}}, 27 | {"type": "error", "index": 2}, 28 | {"type": "debug", "score": 2.0, "index": 3}, 29 | {"type": "action", "score": 1.9, "index": 4, 30 | "outcome": {"error": "KeyError"}}, 31 | {"type": "error", "index": 5}, 32 | {"type": "debug", "score": 1.8, "index": 6}, 33 | {"type": "action", "score": 2.1, "index": 7, 34 | "outcome": {"error": "AssertionError"}}, 35 | {"type": "error", "index": 8}, 36 | {"type": "debug", "score": 2.2, "index": 9}, 37 | {"type": "orientation", "score": 2.5, "index": 10} 38 | ] 39 | 40 | def test_successful_action_chain(self): 41 | processed = process_trace_rewards(self.sample_trace1) 42 | # Orientation should get +0.1 (2.5 -> 2.6) 43 | self.assertAlmostEqual(processed[0]["score"], 2.6) 44 | # Action should keep original score minus no error penalty 45 | self.assertAlmostEqual(processed[1]["score"], 1.8) 46 | 47 | def test_mixed_success_chain(self): 48 | processed = process_trace_rewards(self.sample_trace2) 49 | self.assertAlmostEqual(processed[0]["score"], 3.25) 50 | self.assertAlmostEqual(processed[1]["score"], 1.75) 51 | self.assertAlmostEqual(processed[3]["score"], 2.6) 52 | self.assertAlmostEqual(processed[4]["score"], 2.2) 53 | 54 | def test_triple_failure_chain(self): 55 | processed = process_trace_rewards(self.sample_trace3) 56 | self.assertAlmostEqual(processed[0]["score"], 1.9) 57 | self.assertAlmostEqual(processed[3]["score"], 1.9) 58 | self.assertAlmostEqual(processed[6]["score"], 1.7) 59 | self.assertAlmostEqual(processed[1]["score"], 1.3) 60 | self.assertAlmostEqual(processed[4]["score"], 1.4) 61 | self.assertAlmostEqual(processed[7]["score"], 1.85) 62 | 63 | if __name__ == "__main__": 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /agent/test_weave_kanban.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from typing import List, Optional, Dict, Any 3 | import json 4 | import types 5 | 6 | # Assuming the classes WeaveKanbanTask and WeaveKanban are defined in a module named kanban 7 | from weave_agent import WeaveAgent, WeaveKanbanTask, WeaveKanban 8 | 9 | global agent 10 | 11 | class TestWeaveKanbanTask(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.agent = WeaveAgent("placeholder") 15 | self.kanban = WeaveKanban(self.agent) 16 | 17 | def test_create_and_complete_task(self): 18 | task = WeaveKanbanTask(self.kanban, 1, "Test Task") 19 | task.completed("Task completed") 20 | self.assertEqual(task.status, 'completed') 21 | 22 | def test_create_task_with_evaluations_and_complete(self): 23 | def evaluation_callback(agent): 24 | return True 25 | 26 | task = WeaveKanbanTask(self.kanban, 1, "Test Task") 27 | task.add_evaluation("Test Evaluation", evaluation_callback) 28 | task.completed("Task completed") 29 | self.assertEqual(task.status, 'completed') 30 | 31 | def test_create_task_with_failing_evaluations(self): 32 | def evaluation_callback(agent): 33 | raise ValueError("Test Error") 34 | 35 | task = WeaveKanbanTask(self.kanban, 1, "Test Task") 36 | task.add_evaluation("Test Evaluation", evaluation_callback) 37 | with self.assertRaises(ValueError): 38 | task.completed("Task completed") 39 | 40 | def test_create_task_with_blockers_as_strings(self): 41 | with self.assertRaises(ValueError): 42 | WeaveKanbanTask(self.kanban, 1, "Test Task", status="blocked", blocked_on=["1"]) 43 | 44 | def test_unblock_task_when_blockers_completed(self): 45 | task1 = WeaveKanbanTask(self.kanban, 1, "Blocker Task") 46 | self.kanban.tasks.append(task1) 47 | task2 = WeaveKanbanTask(self.kanban, 2, "Blocked Task", status="blocked", blocked_on=[1]) 48 | self.kanban.tasks.append(task2) 49 | task1.completed("Blocker task completed") 50 | self.kanban.unblock() 51 | self.assertEqual(task2.status, 'idle') 52 | 53 | def test_str_id_becomes_int(self): 54 | task1 = WeaveKanbanTask(self.kanban, "1", "Test Task") # task_id should be int 55 | self.assertEqual(task1.id, 1) 56 | task2 = WeaveKanbanTask(self.kanban, 1, 123) # title should be str 57 | self.assertEqual(task2.title, "123") 58 | 59 | def test_create_task_with_wrong_arguments(self): 60 | with self.assertRaises(TypeError): 61 | # lambda cannot be converted to int 62 | WeaveKanbanTask(self.kanban, lambda x: x+1, "Test Task", status="invalid_status") 63 | class NoString: 64 | def __str__(self): 65 | pass 66 | with self.assertRaises(TypeError): 67 | WeaveKanbanTask(self.kanban, 1, NoString()) # nostring is invalid title 68 | with self.assertRaises(ValueError): 69 | WeaveKanbanTask(self.kanban, 1, "Test Task", status="invalid_status") # invalid status 70 | 71 | class TestWeaveKanban(unittest.TestCase): 72 | 73 | def setUp(self): 74 | self.agent = WeaveAgent("placeholder") 75 | self.kanban = WeaveKanban(self.agent) 76 | 77 | def test_add_task(self): 78 | self.kanban.add_task("Test Task") 79 | self.assertEqual(len(self.kanban.tasks), 1) 80 | self.assertEqual(self.kanban.tasks[0].title, "Test Task") 81 | 82 | def test_get_task(self): 83 | self.kanban.add_task("Test Task") 84 | task = self.kanban.get_task(1) 85 | self.assertIsNotNone(task) 86 | self.assertEqual(task.title, "Test Task") 87 | 88 | def test_view_board(self): 89 | self.kanban.add_task("Test Task 1") 90 | self.kanban.add_task("Test Task 2") 91 | board_view = self.kanban.view_board() 92 | self.assertIn("Test Task 1", board_view) 93 | self.assertIn("Test Task 2", board_view) 94 | 95 | def test_unblock(self): 96 | self.kanban.add_task("Blocker Task") 97 | self.kanban.add_task("Blocked Task", status="blocked", blocked_on=[1]) 98 | blocker_task = self.kanban.get_task(1) 99 | blocked_task = self.kanban.get_task(2) 100 | blocker_task.completed("Blocker task completed") 101 | self.kanban.unblock() 102 | self.assertEqual(blocked_task.status, 'idle') 103 | 104 | def test_to_json(self): 105 | self.kanban.add_task("Test Task") 106 | json_str = self.kanban.to_json() 107 | self.assertIn("Test Task", json_str) 108 | 109 | def test_from_json(self): 110 | self.kanban.add_task("Test Task") 111 | json_str = self.kanban.to_json() 112 | new_kanban = WeaveKanban(self.agent) 113 | new_kanban.from_json(json_str) 114 | self.assertEqual(len(new_kanban.tasks), 1) 115 | self.assertEqual(new_kanban.tasks[0].title, "Test Task") 116 | 117 | if __name__ == '__main__': 118 | unittest.main() 119 | -------------------------------------------------------------------------------- /agent/tools/atspi_desktop.py: -------------------------------------------------------------------------------- 1 | import pyatspi 2 | import pyautogui 3 | import time 4 | import math 5 | 6 | class ATSPIDesktopTurtle: 7 | """Assistive Technology Service Provider Interface and PyAutoGUI based Linux desktop 8 | interaction tool for weave-agent with Logo Turtle like movement semantics.""" 9 | def __init__(self): 10 | self.registry = pyatspi.Registry() 11 | self.desktop = self.registry.getDesktop(0) 12 | self.heading = 0 # Initial heading in degrees (0 degrees is to the right) 13 | 14 | def forward(self, distance=10): 15 | radians = math.radians(self.heading) 16 | dx = distance * math.cos(radians) 17 | dy = distance * math.sin(radians) 18 | pyautogui.moveRel(dx, dy) 19 | 20 | def backward(self, distance=10): 21 | radians = math.radians(self.heading) 22 | dx = -distance * math.cos(radians) 23 | dy = -distance * math.sin(radians) 24 | pyautogui.moveRel(dx, dy) 25 | 26 | def right(self, distance=10): 27 | radians = math.radians(self.heading + 90) 28 | dx = distance * math.cos(radians) 29 | dy = distance * math.sin(radians) 30 | pyautogui.moveRel(dx, dy) 31 | 32 | def left(self, distance=10): 33 | radians = math.radians(self.heading - 90) 34 | dx = distance * math.cos(radians) 35 | dy = distance * math.sin(radians) 36 | pyautogui.moveRel(dx, dy) 37 | 38 | def goto(self, x, y): 39 | pyautogui.moveTo(x, y) 40 | 41 | def setx(self, x): 42 | current_x, current_y = pyautogui.position() 43 | pyautogui.moveTo(x, current_y) 44 | 45 | def sety(self, y): 46 | current_x, current_y = pyautogui.position() 47 | pyautogui.moveTo(current_x, y) 48 | 49 | def setheading(self, angle): 50 | self.heading = angle 51 | 52 | def home(self): 53 | pyautogui.moveTo(0, 0) 54 | 55 | def speed(self, speed): 56 | pyautogui.PAUSE = 1 / speed 57 | 58 | def input_string(self, text): 59 | pyautogui.typewrite(text) 60 | 61 | def input_key_combination(self, keys): 62 | pyautogui.hotkey(*keys) 63 | 64 | def get_screen_elements(self): 65 | elements = [] 66 | for app in self.desktop: 67 | for child in app: 68 | elements.append(child) 69 | return elements 70 | 71 | def get_keyboard_focusable_elements(self): 72 | focusable_elements = [] 73 | for app in self.desktop: 74 | for child in app: 75 | if child.get_state().contains(pyatspi.STATE_FOCUSABLE): 76 | focusable_elements.append(child) 77 | return focusable_elements 78 | 79 | def get_current_object_under_cursor(self): 80 | x, y = pyautogui.position() 81 | obj = self.registry.getAccessibleAtPoint(x, y, pyatspi.DESKTOP_COORDS) 82 | return obj 83 | 84 | def get_current_object_with_keyboard_focus(self): 85 | focus = self.registry.getFocus() 86 | return focus 87 | 88 | def scan(self): 89 | elements_info = [] 90 | for app in self.desktop: 91 | for child in app: 92 | extents = child.queryComponent().getExtents(pyatspi.DESKTOP_COORDS) 93 | x, y, width, height = extents.x, extents.y, extents.width, extents.height 94 | elements_info.append({ 95 | 'element': child, 96 | 'x': x, 97 | 'y': y, 98 | 'width': width, 99 | 'height': height 100 | }) 101 | return elements_info 102 | 103 | def get_element_text(self, element): 104 | text_content = [] 105 | 106 | def traverse(elem): 107 | if elem is not None: 108 | try: 109 | text_interface = elem.queryText() 110 | if text_interface: 111 | text_content.append(text_interface.getText(0, -1)) 112 | except NotImplementedError: 113 | pass 114 | 115 | for child in elem: 116 | traverse(child) 117 | 118 | traverse(element) 119 | return "\n".join(text_content) 120 | 121 | # Example usage 122 | if __name__ == "__main__": 123 | turtle = ATSPIDesktopTurtle() 124 | 125 | # Scan the screen and print elements with their coordinates 126 | elements_info = turtle.scan() 127 | for info in elements_info: 128 | print(f"Element: {info['element']}, Coordinates: ({info['x']}, {info['y']}), Size: ({info['width']}, {info['height']})") 129 | 130 | # Get the text content of the element 131 | text_content = turtle.get_element_text(info['element']) 132 | if text_content: 133 | print(f"Text Content: {text_content}") 134 | else: 135 | print("No text content available.") 136 | -------------------------------------------------------------------------------- /agent/tools/emacs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import libtmux 3 | import time 4 | from datetime import datetime 5 | 6 | class WeaveEmacs: 7 | """A wrapper for Emacs using libtmux to control the editor.""" 8 | def __init__(self, agent): 9 | """Bind tool to weave-agent and set up Emacs.""" 10 | self.agent = agent 11 | self.agent.tools["emacs"] = self 12 | self.observation_view = { 13 | "type": "observation", 14 | "title": "WeaveEmacs", 15 | "callback": self.render 16 | } 17 | self.agent.add_observation_view("WeaveEmacs", self.render, tool="emacs") 18 | 19 | # Initialize the tmux session and pane 20 | self.server = libtmux.Server() 21 | self.session = self.server.new_session(session_name="emacs_session", kill_session=True) 22 | self.window = self.session.new_window(window_name="emacs_window", attach=True) 23 | self.window.resize(height=24, width=80) 24 | self.pane = self.window.split_window(attach=True, size="100%") 25 | 26 | # Start Emacs in the tmux pane 27 | self.pane.send_keys('emacs -nw\n') 28 | 29 | def render(self, agent): 30 | """Render the current Emacs buffer state.""" 31 | rendered_text = "'''Emacs Editor State:\n" 32 | try: 33 | pane_content = self.pane.capture_pane(start=0, end="-") 34 | except Exception as e: 35 | return f"Error capturing Emacs buffer: {str(e)}" 36 | 37 | if not pane_content: 38 | return "Emacs buffer is empty. Start editing to see content." 39 | 40 | # Show last 20 lines of the buffer 41 | buffer_excerpt = "\n".join(pane_content[-20:]) 42 | rendered_text += f"Current Buffer Excerpt:\n{buffer_excerpt}\n\n" 43 | 44 | command_cheat_sheet = ( 45 | "Emacs Key Bindings Cheat Sheet:\n" 46 | " C-x C-f : Open/create file\n" 47 | " C-x C-s : Save current buffer\n" 48 | " C-x C-w : Save buffer as...\n" 49 | " C-x C-c : Exit Emacs\n" 50 | " C-g : Cancel current command\n" 51 | " C-s : Search forward\n" 52 | " C-r : Search backward\n" 53 | " C-a : Beginning of line\n" 54 | " C-e : End of line\n" 55 | " C-n : Next line\n" 56 | " C-p : Previous line\n" 57 | " C-v : Scroll down\n" 58 | " M-v : Scroll up\n" 59 | " M-x : Execute extended command\n" 60 | " C-k : Kill line\n" 61 | " C-y : Yank (paste)\n" 62 | " C-space : Set mark\n" 63 | " C-w : Kill region\n" 64 | " M-w : Copy region\n" 65 | " C-_ : Undo\n" 66 | " C-x u : Undo\n" 67 | " C-x d : Open directory\n" 68 | " C-x b : Switch buffer\n" 69 | ) 70 | rendered_text += command_cheat_sheet 71 | rendered_text += "\n'''" 72 | return rendered_text 73 | 74 | def send_keys(self, command): 75 | """Send a keyboard command to Emacs.""" 76 | self.pane.send_keys(command, enter=False) 77 | time.sleep(0.2) # Allow time for buffer updates 78 | 79 | def send_command(self, command): 80 | """Alias to send commands to Emacs.""" 81 | self.send_keys(command) 82 | 83 | def close(self): 84 | """Close the Emacs session.""" 85 | self.session.kill_session() 86 | del self.agent.tools["emacs"] 87 | self.agent.remove_observation_view(self.observation_view) 88 | 89 | # Example usage 90 | if __name__ == "__main__": 91 | class DummyAgent: 92 | def __init__(self): 93 | self.tools = {} 94 | self.observation_views = [] 95 | 96 | def add_observation_view(self, title, callback): 97 | self.observation_views.append((title, callback)) 98 | 99 | def remove_observation_view(self, view): 100 | self.observation_views.remove(view) 101 | 102 | agent = DummyAgent() 103 | emacs = WeaveEmacs(agent) 104 | 105 | # Simulate basic Emacs usage 106 | time.sleep(2) # Wait for Emacs to start 107 | emacs.send_command('C-x C-f test.txt') 108 | emacs.send_command('Hello World') 109 | emacs.send_command('C-x C-s') 110 | emacs.send_command('C-x C-c') 111 | 112 | try: 113 | while True: 114 | pass 115 | except KeyboardInterrupt: 116 | emacs.close() 117 | -------------------------------------------------------------------------------- /agent/tools/nano.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import libtmux 3 | import time 4 | import os 5 | 6 | class WeaveNano: 7 | """A wrapper for Nano using libtmux to control the editor.""" 8 | def __init__(self, agent, filepath): 9 | self.agent = agent 10 | self.filepath = os.path.abspath(filepath) 11 | # Limit to one instance per file 12 | # This can be changed later if it wants multiple views of the same file(?) 13 | if f"nano-{self.filepath}" in self.agent.tools: 14 | editor = self.agent.tools[f"nano-{self.filepath}"] 15 | editor.close() 16 | self.agent.tools[f"nano-{self.filepath}"] = self 17 | # Let agent grab the last instianted nano instance with this shortcut 18 | self.agent.tools["nano"] = self 19 | self.observation_view = { 20 | "type": "observation", 21 | "title": "WeaveNano", 22 | "callback": self.render 23 | } 24 | # Prevent empty WeaveNano object when observation views are at capacity 25 | try: 26 | self.agent.add_observation_view("WeaveNano", self.render, tool="nano") 27 | except ValueError as e: 28 | del self.agent.tools[f"nano-{self.filepath}"] 29 | del self.agent.tools["nano"] 30 | raise ValueError("Couldn't open editor because there are too many " 31 | + "observation views. Try removing some.") 32 | 33 | # Tmux setup 34 | self.server = libtmux.Server() 35 | session_name = f"nano_{self.filepath}".replace(".","") 36 | self.session = self.server.new_session(session_name=session_name, kill_session=True) 37 | self.window = self.session.new_window(window_name="nano_window", attach=True) 38 | self.window.resize(height=24, width=80) 39 | self.pane = self.window.split_window(attach=True, size="100%") 40 | 41 | # Start Nano 42 | self.pane.send_keys(f'nano {filepath}\n') 43 | time.sleep(1) # Wait for Nano to start 44 | 45 | def render(self, agent): 46 | """Render the Nano editor display.""" 47 | try: 48 | content = '' 49 | for i, line in enumerate(self.pane.capture_pane(start=0, end="-")): 50 | content += line + "\n" 51 | #if i < 3: 52 | # content += line + "\n" 53 | #elif i > 18: 54 | # content += line + "\n" 55 | #else: 56 | # content += f"{i-2}. | {line}\n" 57 | content = content[:-1] 58 | return f"""'''Nano Editor State: 59 | {content} 60 | ----------------- 61 | Nano Cheat Sheet: 62 | C-o - Save 63 | C-x - Exit 64 | C-k - Cut line 65 | C-u - Paste 66 | C-w - Search 67 | C-\\ - Replace 68 | C-g - Help 69 | '''""" 70 | except Exception as e: 71 | return f"Error getting Nano state: {str(e)}" 72 | 73 | def send_keys(self, command): 74 | """Send commands to Nano with proper timing""" 75 | # Special handling for control sequences 76 | self.pane.send_keys(command, enter=False) 77 | time.sleep(0.2) # Nano needs time to process 78 | 79 | def send_command(self, command): 80 | """Alias of send_keys""" 81 | self.send_keys(command) 82 | 83 | def send_commands(self, commands): 84 | for command in commands: 85 | self.send_keys(command) 86 | 87 | def get_screen_text(self): 88 | """Alias for render()""" 89 | return render(self.agent) 90 | 91 | def close(self): 92 | """Close the Nano session""" 93 | self.session.kill_session() 94 | del self.agent.tools["nano"] 95 | del self.agent.tools[f"nano-{self.filepath}"] 96 | self.agent.remove_observation_view(self.observation_view) 97 | -------------------------------------------------------------------------------- /agent/trainer/linear_4bit_sharded.py: -------------------------------------------------------------------------------- 1 | """4-bit quantization and FSDP-style parameter sharding for nn.Linear.""" 2 | 3 | import bitsandbytes as bnb 4 | import torch 5 | from torch import distributed as dist, nn 6 | from torch.nn import functional as F 7 | 8 | 9 | class Linear4bitSharded(nn.Linear): 10 | def __init__( 11 | self, 12 | layer, 13 | device, 14 | group=None, 15 | ): 16 | if not isinstance(layer, nn.Linear): 17 | raise ValueError("layer must be an instance of nn.Linear") 18 | with torch.device("meta"): 19 | super().__init__(layer.in_features, layer.out_features, bias=layer.bias is not None) 20 | self.in_features = layer.in_features 21 | self.out_features = layer.out_features 22 | self.bias = nn.Parameter(layer.bias.to(device)) if layer.bias is not None else None 23 | del self.weight 24 | self.device = device 25 | self.group = group 26 | rank = dist.get_rank(group) 27 | world_size = dist.get_world_size(group) 28 | weight_q, state = bnb.functional.quantize_4bit(layer.weight.to(device)) 29 | self.state = state 30 | self.weight_shape = weight_q.shape 31 | weight_q = weight_q.flatten() 32 | assert weight_q.shape[0] % world_size == 0 33 | n_per_shard = weight_q.shape[0] // world_size 34 | weight_q = weight_q[rank * n_per_shard : (rank + 1) * n_per_shard].clone() 35 | self.register_buffer("weight", weight_q) 36 | 37 | def forward(self, x): 38 | world_size = dist.get_world_size(self.group) 39 | weight_list = [torch.empty_like(self.weight) for _ in range(world_size)] 40 | dist.all_gather(weight_list, self.weight, group=self.group) 41 | weight_q = torch.cat(weight_list).view(self.weight_shape) 42 | weight = bnb.functional.dequantize_4bit(weight_q, self.state) 43 | return F.linear(x, weight, self.bias) 44 | 45 | 46 | def quantize_and_shard(module, device, group=None): 47 | if isinstance(module, nn.Linear): 48 | return Linear4bitSharded(module, device, group) 49 | for name, child in module.named_children(): 50 | setattr(module, name, quantize_and_shard(child, device, group)) 51 | return module.cuda(device) 52 | -------------------------------------------------------------------------------- /agent/trainer/linear_4bit_sharded_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import bitsandbytes as bnb 4 | import torch 5 | from torch import distributed as dist, nn 6 | from torch.distributed import nn as dnn 7 | import torch_dist_utils as du 8 | 9 | from linear_4bit_sharded import Linear4bitSharded 10 | 11 | 12 | @torch.no_grad() 13 | def quantize_layer( 14 | module, compute_dtype=None, blocksize=64, compress_statistics=True, quant_type="fp4" 15 | ): 16 | if not isinstance(module, torch.nn.Linear): 17 | raise ValueError("quantize_layer only supports nn.Linear") 18 | compute_dtype = module.weight.dtype if compute_dtype is None else compute_dtype 19 | q_module = bnb.nn.Linear4bit( 20 | module.in_features, 21 | module.out_features, 22 | bias=module.bias is not None, 23 | compute_dtype=compute_dtype, 24 | compress_statistics=compress_statistics, 25 | quant_type=quant_type, 26 | ) 27 | q_module.weight = bnb.nn.Params4bit( 28 | module.weight, 29 | requires_grad=False, 30 | blocksize=blocksize, 31 | compress_statistics=compress_statistics, 32 | quant_type=quant_type, 33 | ) 34 | if module.bias is not None: 35 | q_module.bias = torch.nn.Parameter(module.bias, requires_grad=module.bias.requires_grad) 36 | if module.weight.device.type == "cuda": 37 | q_module.cuda(module.weight.device) 38 | return q_module 39 | 40 | 41 | def main(): 42 | du.init_distributed() 43 | device = du.get_device() 44 | rank = dist.get_rank() 45 | 46 | layer = nn.Linear(10, 20) 47 | du.broadcast_tensors(layer.parameters()) 48 | layer_q = quantize_layer(layer).to(device) 49 | layer_qs = Linear4bitSharded(layer, device) 50 | 51 | x = torch.randn(4, 10, device=device) 52 | y_ref = layer_q(x) 53 | y = layer_qs(x) 54 | error = torch.sqrt(torch.mean((y - y_ref) ** 2)) 55 | with du.do_in_order(): 56 | print(f"Rank {rank}: error = {error}") 57 | 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /agent/trainer/mistral3_ring_attn_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | from torch import distributed as dist 5 | from torch.distributed import nn as dnn 6 | import torch_dist_utils as du 7 | from transformers import AutoModelForImageTextToText, AutoTokenizer, BitsAndBytesConfig 8 | 9 | from patch_model import patch_model 10 | 11 | 12 | def kl_divergence(logits_p, logits_q): 13 | logp = torch.nn.functional.log_softmax(logits_p, dim=-1) 14 | logq = torch.nn.functional.log_softmax(logits_q, dim=-1) 15 | return torch.sum(torch.exp(logp) * (logp - logq), dim=-1) 16 | 17 | 18 | def main(): 19 | du.init_distributed() 20 | device = du.get_device() 21 | rank = dist.get_rank() 22 | world_size = dist.get_world_size() 23 | 24 | model_name = "mistralai/Mistral-Small-3.1-24B-Base-2503" 25 | tokenizer = AutoTokenizer.from_pretrained(model_name) 26 | prompt = "The quick brown fox jumps over the lazy dog, " * 8 27 | tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64] 28 | n_tokens = tokens.shape[1] 29 | assert n_tokens % world_size == 0 30 | position_ids = torch.arange(n_tokens, device=device)[None] 31 | n_tokens_device = n_tokens // world_size 32 | du.print0("Number of tokens:", n_tokens) 33 | du.print0("Number of tokens per device:", n_tokens_device) 34 | 35 | bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16) 36 | 37 | model = ( 38 | AutoModelForImageTextToText.from_pretrained( 39 | model_name, 40 | device_map={"": device}, 41 | torch_dtype=torch.bfloat16, 42 | quantization_config=bnb_config, 43 | ) 44 | .eval() 45 | .requires_grad_(False) 46 | ) 47 | logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits 48 | 49 | del model 50 | torch.cuda.empty_cache() 51 | patch_model() 52 | 53 | model = ( 54 | AutoModelForImageTextToText.from_pretrained( 55 | model_name, 56 | device_map={"": device}, 57 | torch_dtype=torch.bfloat16, 58 | quantization_config=bnb_config, 59 | ) 60 | .eval() 61 | .requires_grad_(False) 62 | ) 63 | tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device] 64 | position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device] 65 | logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits 66 | logits_all = torch.cat(dnn.all_gather(logits), dim=1) 67 | 68 | error = kl_divergence(logits_ref, logits_all).mean() 69 | with du.do_in_order(): 70 | print(f"Rank {rank}: error = {error}") 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /agent/trainer/mixtral_ring_attn_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | from torch import distributed as dist 5 | from torch.distributed import nn as dnn 6 | import torch_dist_utils as du 7 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig 8 | 9 | from patch_model import patch_model 10 | 11 | 12 | def kl_divergence(logits_p, logits_q): 13 | logp = torch.nn.functional.log_softmax(logits_p, dim=-1) 14 | logq = torch.nn.functional.log_softmax(logits_q, dim=-1) 15 | return torch.sum(torch.exp(logp) * (logp - logq), dim=-1) 16 | 17 | 18 | def main(): 19 | du.init_distributed() 20 | device = du.get_device() 21 | rank = dist.get_rank() 22 | world_size = dist.get_world_size() 23 | 24 | model_name = "mistralai/Mixtral-8x7B-v0.1" 25 | tokenizer = AutoTokenizer.from_pretrained(model_name) 26 | prompt = "The quick brown fox jumps over the lazy dog, " * 8 27 | tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64] 28 | n_tokens = tokens.shape[1] 29 | assert n_tokens % world_size == 0 30 | position_ids = torch.arange(n_tokens, device=device)[None] 31 | n_tokens_device = n_tokens // world_size 32 | du.print0("Number of tokens:", n_tokens) 33 | du.print0("Number of tokens per device:", n_tokens_device) 34 | 35 | bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16) 36 | 37 | model = ( 38 | AutoModelForCausalLM.from_pretrained( 39 | model_name, 40 | device_map={"": device}, 41 | torch_dtype=torch.bfloat16, 42 | quantization_config=bnb_config, 43 | ) 44 | .eval() 45 | .requires_grad_(False) 46 | ) 47 | logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits 48 | 49 | del model 50 | torch.cuda.empty_cache() 51 | patch_model() 52 | 53 | model = ( 54 | AutoModelForCausalLM.from_pretrained( 55 | model_name, 56 | device_map={"": device}, 57 | torch_dtype=torch.bfloat16, 58 | quantization_config=bnb_config, 59 | ) 60 | .eval() 61 | .requires_grad_(False) 62 | ) 63 | tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device] 64 | position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device] 65 | logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits 66 | logits_all = torch.cat(dnn.all_gather(logits), dim=1) 67 | 68 | error = kl_divergence(logits_ref, logits_all).mean() 69 | with du.do_in_order(): 70 | print(f"Rank {rank}: error = {error}") 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /agent/trainer/patch_model.py: -------------------------------------------------------------------------------- 1 | from transformers import modeling_utils 2 | 3 | from ring_attn import ring_attn 4 | 5 | 6 | class RingAttentionForward: 7 | def __init__(self, group): 8 | self.group = group 9 | 10 | def __call__(self, module, query, key, value, *args, **kwargs): 11 | query = query.transpose(1, 2) 12 | key = key.transpose(1, 2) 13 | value = value.transpose(1, 2) 14 | out = ring_attn(query, key, value, causal=module.is_causal, group=self.group) 15 | return out, None 16 | 17 | 18 | def patch_model(group=None): 19 | modeling_utils.ALL_ATTENTION_FUNCTIONS["sdpa"] = RingAttentionForward(group) 20 | -------------------------------------------------------------------------------- /agent/trainer/qwen2_ring_attn_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | from torch import distributed as dist 5 | from torch.distributed import nn as dnn 6 | import torch_dist_utils as du 7 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig 8 | 9 | from patch_model import patch_model 10 | 11 | 12 | def kl_divergence(logits_p, logits_q): 13 | logp = torch.nn.functional.log_softmax(logits_p, dim=-1) 14 | logq = torch.nn.functional.log_softmax(logits_q, dim=-1) 15 | return torch.sum(torch.exp(logp) * (logp - logq), dim=-1) 16 | 17 | 18 | def main(): 19 | du.init_distributed() 20 | device = du.get_device() 21 | rank = dist.get_rank() 22 | world_size = dist.get_world_size() 23 | 24 | model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" 25 | tokenizer = AutoTokenizer.from_pretrained(model_name) 26 | prompt = "The quick brown fox jumps over the lazy dog, " * 8 27 | tokens = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"][:, :64] 28 | n_tokens = tokens.shape[1] 29 | assert n_tokens % world_size == 0 30 | position_ids = torch.arange(n_tokens, device=device)[None] 31 | n_tokens_device = n_tokens // world_size 32 | du.print0("Number of tokens:", n_tokens) 33 | du.print0("Number of tokens per device:", n_tokens_device) 34 | 35 | bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16) 36 | 37 | model = ( 38 | AutoModelForCausalLM.from_pretrained( 39 | model_name, 40 | device_map={"": device}, 41 | torch_dtype=torch.bfloat16, 42 | quantization_config=bnb_config, 43 | ) 44 | .eval() 45 | .requires_grad_(False) 46 | ) 47 | logits_ref = model(tokens, position_ids=position_ids, use_cache=False).logits 48 | 49 | del model 50 | torch.cuda.empty_cache() 51 | patch_model() 52 | 53 | model = ( 54 | AutoModelForCausalLM.from_pretrained( 55 | model_name, 56 | device_map={"": device}, 57 | torch_dtype=torch.bfloat16, 58 | quantization_config=bnb_config, 59 | ) 60 | .eval() 61 | .requires_grad_(False) 62 | ) 63 | tokens_device = tokens[:, rank * n_tokens_device : (rank + 1) * n_tokens_device] 64 | position_ids_device = position_ids[:, rank * n_tokens_device : (rank + 1) * n_tokens_device] 65 | logits = model(tokens_device, position_ids=position_ids_device, use_cache=False).logits 66 | logits_all = torch.cat(dnn.all_gather(logits), dim=1) 67 | 68 | error = kl_divergence(logits_ref, logits_all).mean() 69 | with du.do_in_order(): 70 | print(f"Rank {rank}: error = {error}") 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /agent/trainer/ring_attn.py: -------------------------------------------------------------------------------- 1 | """Ring attention for PyTorch. 2 | 3 | See https://github.com/nshepperd/flash_attn_jax/blob/main/src/flash_attn_jax/ring_attention.py. 4 | """ 5 | 6 | import flash_attn.flash_attn_interface as fai 7 | import torch 8 | from torch import distributed as dist 9 | 10 | 11 | def ppermute(xs, perm, group=None): 12 | rank = dist.get_rank(group) 13 | ys = [torch.empty_like(x) for x in xs] 14 | ops = [] 15 | for src, dst in perm: 16 | for x, y in zip(xs, ys): 17 | if src == rank: 18 | ops.append(dist.P2POp(dist.isend, x, dst, group)) 19 | if dst == rank: 20 | ops.append(dist.P2POp(dist.irecv, y, src, group)) 21 | reqs = dist.batch_isend_irecv(ops) 22 | for req in reqs: 23 | req.wait() 24 | return ys 25 | 26 | 27 | def _flash_fwd(q, k, v, causal): 28 | ret = fai._flash_attn_forward( 29 | q=q, 30 | k=k, 31 | v=v, 32 | dropout_p=0.0, 33 | softmax_scale=k.shape[-1] ** -0.5, 34 | causal=causal, 35 | window_size=(-1, 0) if causal else (-1, -1), 36 | softcap=0.0, 37 | alibi_slopes=None, 38 | return_softmax=False, 39 | ) 40 | return ret[0], ret[5] # out, lse 41 | 42 | 43 | def _flash_bwd(do, q, k, v, o, lse, causal): 44 | ret = fai._flash_attn_backward( 45 | dout=do, 46 | q=q, 47 | k=k, 48 | v=v, 49 | out=o, 50 | softmax_lse=lse, 51 | dq=torch.empty_like(q), 52 | dk=torch.empty_like(k), 53 | dv=torch.empty_like(v), 54 | dropout_p=0, 55 | softmax_scale=k.shape[-1] ** -0.5, 56 | causal=causal, 57 | window_size=(-1, 0) if causal else (-1, -1), 58 | softcap=0.0, 59 | alibi_slopes=None, 60 | deterministic=False, 61 | rng_state=None, 62 | ) 63 | return ret[0], ret[1], ret[2] # dq, dk, dv 64 | 65 | 66 | def _ring_fwd(q, k, v, causal=False, group=None): 67 | rank = dist.get_rank(group) 68 | world_size = dist.get_world_size(group) 69 | perm = [(i, (i + 1) % world_size) for i in range(world_size)] 70 | n, s, h, d = q.shape 71 | q_ix = torch.tensor(rank, device=q.device) 72 | k_ix = torch.tensor(rank, device=q.device) 73 | o = torch.zeros_like(q, dtype=torch.float32) 74 | lse = torch.full((n, h, s), float("-inf"), device=q.device, dtype=torch.float32) 75 | for _ in range(world_size): 76 | o1, lse1 = o, lse 77 | if not causal: 78 | o2, lse2 = _flash_fwd(q, k, v, causal=False) 79 | else: 80 | if q_ix < k_ix: 81 | o2 = torch.zeros_like(q) 82 | lse2 = torch.full((n, h, s), float("-inf"), device=q.device, dtype=torch.float32) 83 | elif q_ix == k_ix: 84 | o2, lse2 = _flash_fwd(q, k, v, causal=True) 85 | else: 86 | o2, lse2 = _flash_fwd(q, k, v, causal=False) 87 | lse = torch.logaddexp(lse1, lse2) 88 | o = o1 * torch.exp(lse1 - lse).mT[..., None] + o2 * torch.exp(lse2 - lse).mT[..., None] 89 | k, v, k_ix = ppermute([k, v, k_ix], perm, group) 90 | return o.to(q.dtype), lse 91 | 92 | 93 | def _ring_bwd(do, q, k, v, o, lse, causal=False, group=None): 94 | rank = dist.get_rank(group) 95 | world_size = dist.get_world_size(group) 96 | perm = [(i, (i + 1) % world_size) for i in range(world_size)] 97 | ix = torch.tensor(rank, device=q.device) 98 | dq = torch.zeros_like(q, dtype=torch.float32) 99 | dk = torch.zeros_like(k, dtype=torch.float32) 100 | dv = torch.zeros_like(v, dtype=torch.float32) 101 | k2, v2, dk2, dv2, ix2 = k, v, dk, dv, ix 102 | for _ in range(world_size): 103 | dk2_, dv2_, k2_, v2_, ix2_ = ppermute([dk2, dv2, k2, v2, ix2], perm, group) 104 | if not causal: 105 | dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=False) 106 | dq += dqa 107 | dk2_ += dka 108 | dv2_ += dva 109 | else: 110 | if ix == ix2: 111 | dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=True) 112 | elif ix > ix2: 113 | dqa, dka, dva = _flash_bwd(do, q, k2, v2, o, lse, causal=False) 114 | if ix >= ix2: 115 | dq += dqa 116 | dk2_ += dka 117 | dv2_ += dva 118 | k2, v2, dk2, dv2, ix2 = k2_, v2_, dk2_, dv2_, ix2_ 119 | dk2, dv2 = ppermute([dk2, dv2], perm) 120 | return dq.to(q.dtype), dk2.to(k.dtype), dv2.to(v.dtype) 121 | 122 | 123 | class _RingAttention(torch.autograd.Function): 124 | @staticmethod 125 | def setup_context(ctx, inputs, output): 126 | q, k, v, causal, group = inputs 127 | o, lse = output 128 | ctx.causal = causal 129 | ctx.group = group 130 | ctx.save_for_backward(q, k, v, o, lse) 131 | 132 | @staticmethod 133 | def forward(q, k, v, causal, group): 134 | return _ring_fwd(q, k, v, causal=causal, group=group) 135 | 136 | @staticmethod 137 | def backward(ctx, do, _): 138 | q, k, v, o, lse = ctx.saved_tensors 139 | dq, dk, dv = _ring_bwd(do, q, k, v, o, lse, causal=ctx.causal, group=ctx.group) 140 | return dq, dk, dv, None, None 141 | 142 | 143 | def ring_attn(q, k, v, causal=False, group=None): 144 | o, lse = _RingAttention.apply(q, k, v, causal, group) 145 | return o 146 | -------------------------------------------------------------------------------- /agent/trainer/ring_attn_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import flash_attn 4 | import torch 5 | from torch import distributed as dist 6 | from torch.distributed import nn as dnn 7 | import torch_dist_utils as du 8 | 9 | from ring_attn import ppermute, ring_attn 10 | 11 | 12 | def main(): 13 | du.init_distributed() 14 | device = du.get_device() 15 | rank = dist.get_rank() 16 | world_size = dist.get_world_size() 17 | 18 | # test ppermute 19 | du.print0("Testing ppermute...") 20 | x = torch.arange(rank * 4, (rank + 1) * 4, device=device) 21 | perm = [(i, (i + 1) % world_size) for i in range(world_size)] 22 | y = ppermute([x], perm) 23 | with du.do_in_order(): 24 | print(f"Rank {rank}: x = {x}, y = {y}") 25 | 26 | q = torch.randn(4, 10, 8, 64, device=device, dtype=torch.bfloat16) 27 | k = torch.randn(4, 10, 4, 64, device=device, dtype=torch.bfloat16) 28 | v = torch.randn(4, 10, 4, 64, device=device, dtype=torch.bfloat16) 29 | do = torch.randn(4, 10, 8, 64, device=device, dtype=torch.bfloat16) 30 | q_all = torch.cat(dnn.all_gather(q), dim=1) 31 | k_all = torch.cat(dnn.all_gather(k), dim=1) 32 | v_all = torch.cat(dnn.all_gather(v), dim=1) 33 | do_all = torch.cat(dnn.all_gather(do), dim=1) 34 | 35 | # non-causal 36 | du.print0("Testing non-causal ring attention...") 37 | q_all_ = q_all.clone().requires_grad_() 38 | k_all_ = k_all.clone().requires_grad_() 39 | v_all_ = v_all.clone().requires_grad_() 40 | o_ref = flash_attn.flash_attn_func(q_all_, k_all_, v_all_, causal=False) 41 | o_ref.backward(do_all) 42 | q_ = q.clone().requires_grad_() 43 | k_ = k.clone().requires_grad_() 44 | v_ = v.clone().requires_grad_() 45 | o = ring_attn(q_, k_, v_, causal=False) 46 | o.backward(do) 47 | o_all = torch.cat(dnn.all_gather(o), dim=1) 48 | dq_all = torch.cat(dnn.all_gather(q_.grad), dim=1) 49 | dk_all = torch.cat(dnn.all_gather(k_.grad), dim=1) 50 | dv_all = torch.cat(dnn.all_gather(v_.grad), dim=1) 51 | error_o = torch.sqrt(torch.mean((o_all - o_ref) ** 2)) 52 | error_dq = torch.sqrt(torch.mean((q_all_.grad - dq_all) ** 2)) 53 | error_dk = torch.sqrt(torch.mean((k_all_.grad - dk_all) ** 2)) 54 | error_dv = torch.sqrt(torch.mean((v_all_.grad - dv_all) ** 2)) 55 | with du.do_in_order(): 56 | print(f"Rank {rank}: error o = {error_o}") 57 | print(f"Rank {rank}: error dq = {error_dq}") 58 | print(f"Rank {rank}: error dk = {error_dk}") 59 | print(f"Rank {rank}: error dv = {error_dv}") 60 | 61 | # causal 62 | du.print0("Testing causal ring attention...") 63 | q_all_ = q_all.clone().requires_grad_() 64 | k_all_ = k_all.clone().requires_grad_() 65 | v_all_ = v_all.clone().requires_grad_() 66 | o_ref = flash_attn.flash_attn_func(q_all_, k_all_, v_all_, causal=True) 67 | o_ref.backward(do_all) 68 | q_ = q.clone().requires_grad_() 69 | k_ = k.clone().requires_grad_() 70 | v_ = v.clone().requires_grad_() 71 | o = ring_attn(q_, k_, v_, causal=True) 72 | o.backward(do) 73 | o_all = torch.cat(dnn.all_gather(o), dim=1) 74 | dq_all = torch.cat(dnn.all_gather(q_.grad), dim=1) 75 | dk_all = torch.cat(dnn.all_gather(k_.grad), dim=1) 76 | dv_all = torch.cat(dnn.all_gather(v_.grad), dim=1) 77 | error_o = torch.sqrt(torch.mean((o_all - o_ref) ** 2)) 78 | error_dq = torch.sqrt(torch.mean((q_all_.grad - dq_all) ** 2)) 79 | error_dk = torch.sqrt(torch.mean((k_all_.grad - dk_all) ** 2)) 80 | error_dv = torch.sqrt(torch.mean((v_all_.grad - dv_all) ** 2)) 81 | with du.do_in_order(): 82 | print(f"Rank {rank}: error o = {error_o}") 83 | print(f"Rank {rank}: error dq = {error_dq}") 84 | print(f"Rank {rank}: error dk = {error_dk}") 85 | print(f"Rank {rank}: error dv = {error_dv}") 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /agent/trainer/trainer_val_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import json 5 | import os 6 | from pathlib import Path 7 | 8 | import peft 9 | import torch 10 | from torch import distributed as dist 11 | from torch.nn import functional as F 12 | from torch.utils import data 13 | import torch_dist_utils as du 14 | from tqdm import tqdm 15 | from transformers import AutoModelForCausalLM, AutoModelForImageTextToText, AutoTokenizer 16 | 17 | from linear_4bit_sharded import quantize_and_shard 18 | from patch_model import patch_model 19 | 20 | print = tqdm.external_write_mode()(print) 21 | print0 = tqdm.external_write_mode()(du.print0) 22 | 23 | 24 | class Dataset(data.Dataset): 25 | def __init__(self, path, tokenizer): 26 | self.tokenizer = tokenizer 27 | self.dataset = Path(path).read_text().splitlines() 28 | 29 | def __len__(self): 30 | return len(self.dataset) 31 | 32 | def __getitem__(self, idx): 33 | return self.tokenizer(json.loads(self.dataset[idx])["text"]).input_ids 34 | 35 | 36 | class CollateFn: 37 | def __init__(self, seq_len): 38 | self.seq_len = seq_len 39 | 40 | def __call__(self, batch): 41 | input_ids = torch.full((len(batch), self.seq_len), 0, dtype=torch.long) 42 | target_ids = torch.full((len(batch), self.seq_len), -100, dtype=torch.long) 43 | for i, x in enumerate(batch): 44 | ids = torch.tensor(x, dtype=torch.long) 45 | max_len = min(len(x) - 1, self.seq_len) 46 | input_ids[i, :max_len] = ids[:max_len] 47 | target_ids[i, :max_len] = ids[1 : max_len + 1] 48 | return input_ids, target_ids 49 | 50 | 51 | def main(): 52 | parser = argparse.ArgumentParser(description=__doc__) 53 | parser.add_argument("--model", type=str, required=True, help="Model name or path") 54 | parser.add_argument("--adapter", type=str, help="adapter name or path") 55 | parser.add_argument("--dataset", type=Path, required=True, help="Dataset path") 56 | parser.add_argument("--batch-size", type=int, default=1, help="Batch size per group") 57 | parser.add_argument("--seq-len", type=int, required=True, help="Sequence length") 58 | parser.add_argument("--mistral3", action="store_true", help="Use Mistral 3 model") 59 | args = parser.parse_args() 60 | 61 | du.init_distributed() 62 | device = du.get_device() 63 | rank = dist.get_rank() 64 | world_size = dist.get_world_size() 65 | local_group = du.get_local_group() 66 | local_rank = dist.get_rank(local_group) 67 | local_world_size = dist.get_world_size(local_group) 68 | group_rank = int(os.environ["GROUP_RANK"]) 69 | group_world_size = world_size // local_world_size 70 | seq_len_device = args.seq_len // local_world_size 71 | 72 | # this does the wrong thing when the number of examples is not divisible by the group world size 73 | # i'll fix it later 74 | tokenizer = AutoTokenizer.from_pretrained(args.model) 75 | dataset = Dataset(args.dataset, tokenizer) 76 | sampler = data.DistributedSampler( 77 | dataset, group_world_size, group_rank, shuffle=False, drop_last=True 78 | ) 79 | dataloader = data.DataLoader( 80 | dataset, 81 | batch_size=args.batch_size, 82 | sampler=sampler, 83 | collate_fn=CollateFn(args.seq_len), 84 | ) 85 | 86 | patch_model(local_group) 87 | model_cls = AutoModelForCausalLM if not args.mistral3 else AutoModelForImageTextToText 88 | model = model_cls.from_pretrained(args.model, torch_dtype=torch.bfloat16) 89 | model = quantize_and_shard(model, device, local_group) 90 | torch.cuda.empty_cache() 91 | if args.adapter: 92 | model = peft.PeftModel.from_pretrained(model, args.adapter) 93 | 94 | total_loss = torch.tensor(0.0, device=device) 95 | total_targets = torch.tensor(0, device=device) 96 | 97 | for i, (input_ids, target_ids) in enumerate(tqdm(dataloader, disable=rank != 0)): 98 | input_ids, target_ids = input_ids.to(device), target_ids.to(device) 99 | seq_start = local_rank * seq_len_device 100 | seq_end = (local_rank + 1) * seq_len_device 101 | input_ids_local = input_ids[:, seq_start:seq_end] 102 | target_ids_local = target_ids[:, seq_start:seq_end] 103 | position_ids_local = torch.arange(seq_start, seq_end, device=device) 104 | position_ids_local = position_ids_local.expand_as(input_ids_local) 105 | total_targets += torch.sum(target_ids_local != -100) 106 | with torch.no_grad(), torch.amp.autocast("cuda", dtype=torch.bfloat16): 107 | logits = model(input_ids_local, position_ids=position_ids_local, use_cache=False).logits 108 | loss = F.cross_entropy(logits.mT, target_ids_local, reduction="sum") 109 | total_loss += loss 110 | 111 | dist.all_reduce(total_loss) 112 | dist.all_reduce(total_targets) 113 | val_loss = total_loss / total_targets 114 | print0(f"Validation loss: {val_loss:g}") 115 | 116 | 117 | if __name__ == "__main__": 118 | main() 119 | -------------------------------------------------------------------------------- /agent/tuning_preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import random 4 | from argparse import ArgumentParser 5 | from datasets import load_dataset, concatenate_datasets 6 | from transformers import AutoTokenizer 7 | from functools import partial 8 | 9 | parser = ArgumentParser() 10 | parser.add_argument("training_format") 11 | parser.add_argument("--model", default="Qwen/Qwen2.5-Coder-32B-Instruct") 12 | parser.add_argument("--dataset", default="jdpressman/retroinstruct-agent-mix-v0.2") 13 | parser.add_argument("--context-len", type=int, default=128000) 14 | args = parser.parse_args() 15 | 16 | # Initialize the tokenizer 17 | tokenizer = AutoTokenizer.from_pretrained(args.model) 18 | 19 | # Dataset helper functions 20 | def combine_flan(row): 21 | return f" [INST] {row['inputs']} [/INST]{row['targets']}" 22 | 23 | def combine_dolly(row): 24 | return f" [INST] {row['context']}\n\n{row['instruction']} [/INST]{row['response']}" 25 | 26 | def to_tokens(combine_fn, row): 27 | return tokenizer(combine_fn(row), add_special_tokens=False) 28 | 29 | # Load dataset 30 | print("### Loading datasets", file=sys.stderr) 31 | dataset_1 = load_dataset("Open-Orca/FLAN") 32 | dataset_2 = load_dataset("databricks/databricks-dolly-15k") 33 | dataset_3 = load_dataset(args.dataset) 34 | 35 | # Slice the first 150,000 items from dataset_1 36 | dataset_1_sliced = dataset_1["train"].select(range(150000)) 37 | 38 | # Apply map to the sliced dataset_1 and the other datasets 39 | dataset_1_sliced = dataset_1_sliced.map(partial(to_tokens, combine_flan)) 40 | dataset_2 = dataset_2["train"].map(partial(to_tokens, combine_dolly)) 41 | dataset_3 = dataset_3["train"].map(partial(to_tokens, combine_flan)) 42 | 43 | # Combine datasets 44 | combined_dataset = concatenate_datasets([dataset_1_sliced, dataset_2, dataset_3]) 45 | 46 | # Shuffle the combined dataset 47 | combined_dataset = combined_dataset.shuffle() 48 | 49 | # Concatenate all rows into a single list of tokens 50 | concatenated_tokens = [] 51 | for row in combined_dataset: 52 | concatenated_tokens.extend(row["input_ids"]) 53 | 54 | # Split into chunks of context_len tokens 55 | chunk_size = args.context_len 56 | chunks = [] 57 | for i in range(0, len(concatenated_tokens), chunk_size): 58 | chunks.append(concatenated_tokens[i:i + chunk_size]) 59 | 60 | # Shuffle the chunks 61 | random.shuffle(chunks) 62 | 63 | # Convert chunks back to text 64 | chunked_texts = [tokenizer.decode(chunk) for chunk in chunks] 65 | # assert len(tokenizer(chunked_texts[0], add_special_tokens=False)["input_ids"]) == 64000 66 | 67 | train_val_test_ratios = [0.85, 0.1, 0.05] 68 | train_len = int(train_val_test_ratios[0] * len(chunked_texts)) 69 | val_len = int(train_val_test_ratios[1] * len(chunked_texts)) 70 | test_len = int(train_val_test_ratios[2] * len(chunked_texts)) 71 | 72 | train = chunked_texts[:train_len] 73 | val = chunked_texts[train_len:train_len+val_len] 74 | test = chunked_texts[train_len+val_len:] 75 | 76 | def write_dataset(filepath, data, _format): 77 | # Save to JSON lines file 78 | with open(filepath, 'w') as f: 79 | for text in data: 80 | if _format == "axolotl": 81 | f.write(json.dumps({"text": text}) + "\n") 82 | elif _format == "nemo": 83 | f.write(json.dumps({"input": "", "output": text}) + "\n") 84 | f.flush() 85 | 86 | write_dataset("weave_train.jsonl", train, args.training_format) 87 | write_dataset("weave_val.jsonl", val, args.training_format) 88 | write_dataset("weave_test.jsonl", test, args.training_format) 89 | 90 | print("### Dataset preprocessing complete", file=sys.stderr) 91 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import json 3 | import zipfile 4 | 5 | class ZippedConversationsDataset: 6 | def __init__(self, zip_file): 7 | self.training_items = [] 8 | zip_ = zipfile.ZipFile(zip_file) 9 | for file_ in zip_.namelist(): 10 | if file_.endswith("/"): # Skip directories 11 | continue 12 | if file_.startswith("__MACOSX"): # Mac OS X adds garbage to zips 13 | continue 14 | with zip_.open(file_) as infile: 15 | conversation = json.load(infile) 16 | for id_ in conversation["responseDict"]: 17 | branch = conversation["responseDict"][id_] 18 | if branch["rating"] == None: # Skip unrated entries 19 | continue 20 | label = "Yes" if branch["rating"] else "No" 21 | text = branch["evaluationPrompt"].format( 22 | prompt = branch["prompt"], 23 | response = branch["text"]) + "\n" + label 24 | self.training_items.append(text) 25 | random.shuffle(self.training_items) 26 | 27 | def __len__(self): 28 | return len(self.training_items) 29 | 30 | def __next__(self): 31 | return random.sample(self.training_items, 1)[0] 32 | -------------------------------------------------------------------------------- /dpo_loss.py: -------------------------------------------------------------------------------- 1 | """Direct Preference Optimization loss. (https://arxiv.org/abs/2305.18290)""" 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | 8 | def logp_completion(logits, tokens, mask): 9 | """Compute the log probabilities of completions given their prompts. 10 | 11 | Args: 12 | tokens: The tokens input to the model. Shape: (..., T). 13 | logits: The logits output from the model. Shape: (..., T, V). 14 | mask: A mask indicating which tokens should be included in the log probabilities. It should 15 | exclude prompt tokens and padding tokens. Shape: (..., T). 16 | """ 17 | logits = F.log_softmax(logits, dim=-1) 18 | logp_tokens = logits[..., :-1, :].gather(-1, tokens[..., 1:, None])[..., 0] 19 | return torch.sum(logp_tokens * mask[..., 1:], dim=-1) 20 | 21 | 22 | def reduction(x, reduction): 23 | if reduction == "mean": 24 | return torch.mean(x) 25 | elif reduction == "sum": 26 | return torch.sum(x) 27 | elif reduction == "none": 28 | return x 29 | else: 30 | raise ValueError(f"Unknown reduction: {reduction}") 31 | 32 | 33 | class DPOLoss(nn.Module): 34 | """Direct Preference Optimization loss. (https://arxiv.org/abs/2305.18290) 35 | 36 | The DPO loss takes as input pairs of log probabilities of completions given the 37 | same prompt for each completion in a pair, under the model and a reference model, and a win 38 | rate indicating how often the first completion is preferred over the second. It optimizes the 39 | model to maximize the implied reward, regularized by the KL divergence between the model and 40 | the reference model. 41 | 42 | Conservative DPO (https://ericmitchell.ai/cdpo.pdf) is supported using the `eps` parameter 43 | and/or the `win_rate` argument. 44 | 45 | Args: 46 | beta (float): The KL penalty coefficient. 47 | eps (float): The label smoothing amount. 48 | reduction (str): The reduction to apply to the loss. 49 | """ 50 | 51 | def __init__(self, beta, eps=0.0, reduction="mean"): 52 | super().__init__() 53 | self.beta = beta 54 | self.eps = eps 55 | self.reduction = reduction 56 | 57 | def extra_repr(self): 58 | return f"beta={self.beta:g}, eps={self.eps:g}, reduction={self.reduction!r}" 59 | 60 | def forward(self, logp_1, logp_ref_1, logp_2, logp_ref_2, win_rate=None): 61 | """Compute the Direct Preference Optimization loss. 62 | 63 | Args: 64 | logp_1: Log probabilities of the first completions given their prompts under the 65 | model. Should be differentiable w.r.t. the model parameters. Shape: (N). 66 | logp_ref_1: Log probabilities of the first completions given their prompts under the 67 | reference model. Shape: (N). 68 | logp_2: Log probabilities of the second completions given their prompts, under the 69 | model. Should be differentiable w.r.t. the model parameters. Shape: (N). 70 | logp_ref_2: Log probabilities of the second completions given their prompts under the 71 | reference model. Shape: (N). 72 | win_rate: 0-1, indicating how often the first completion is preferred over the second. 73 | Shape: (N). Default: 1 (the first completion is always preferred). 74 | """ 75 | win_rate = torch.ones_like(logp_1) if win_rate is None else win_rate 76 | win_rate = win_rate * (1 - 2 * self.eps) + self.eps 77 | ratio_1 = logp_1 - logp_ref_1 78 | ratio_2 = logp_2 - logp_ref_2 79 | losses_1 = -F.logsigmoid(self.beta * (ratio_1 - ratio_2)) 80 | losses_2 = -F.logsigmoid(self.beta * (ratio_2 - ratio_1)) 81 | losses = torch.lerp(losses_2, losses_1, win_rate) 82 | return reduction(losses, self.reduction) 83 | -------------------------------------------------------------------------------- /examples/bayes_evaluate_weave_test_cases.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import os 3 | import re 4 | import json 5 | import time 6 | import datetime 7 | import random 8 | import hashlib 9 | import zipfile 10 | from contextlib import contextmanager 11 | from functools import partial 12 | from itertools import islice 13 | from tqdm import tqdm 14 | import torch 15 | from bayes_weave import generate_outputs_vllm, bayesian_evaluate_outputs_vllm 16 | from bayes_weave import make_bayes_score_prompt_vllm 17 | 18 | def batched(iterable, n): 19 | "Batch data into tuples of length n. The last batch may be shorter." 20 | # batched('ABCDEFG', 3) --> ABC DEF G 21 | if n < 1: 22 | raise ValueError("n must be at least one") 23 | it = iter(iterable) 24 | while batch := tuple(islice(it, n)): 25 | yield batch 26 | 27 | def parse_constitution(cons): 28 | principles = {} 29 | raw_principles = re.split("==\[(.+)\]==", cons)[1:] 30 | principle_pairs = [i for i in batched(raw_principles, 2)] 31 | principle_pairs = [(i[0].strip(), i[1].strip()) for i in principle_pairs] 32 | principles["preamble"] = principle_pairs[0][1] 33 | principles["principles"] = [] 34 | for pair in principle_pairs[1:]: 35 | principle = {} 36 | for parameter in pair[0].split(";"): 37 | try: 38 | name, value = parameter.split(":") 39 | except ValueError: 40 | raise ValueError(f"{pair} is missing a colon in a header value") 41 | principle[name.strip().lower()] = value.strip().lower() 42 | principle["body"] = pair[1].strip() 43 | principles["principles"].append(principle) 44 | return principles 45 | 46 | def prepare_rubric(rubric_path, rubric_score_fn, prompt): 47 | with open(rubric_path) as infile: 48 | rubric = parse_constitution(infile.read()) 49 | principle_weights = [float(principle["weight"]) for principle in rubric["principles"]] 50 | principle_weights = torch.tensor(principle_weights) 51 | principle_signs = [] 52 | for principle in rubric["principles"]: 53 | answer = principle["answer"].lower() 54 | if answer not in {"yes", "no"}: 55 | raise ValueError("desired answer must be yes or no") 56 | principle_signs.append(1 if answer == "yes" else -1) 57 | principle_signs = torch.tensor(principle_signs) 58 | rubric_score_fns = [] 59 | parent_q = rubric["principles"][0]["body"].format(preamble="", 60 | parent_q="", 61 | prompt="", 62 | response="") 63 | for principle in rubric["principles"]: 64 | evaluation_prompt = principle["body"].format(preamble=rubric["preamble"], 65 | parent_q="{parent_q}", 66 | prompt=prompt, 67 | response="{response}") 68 | score_prompt_fn = partial(rubric_score_fn, evaluation_prompt) 69 | # FLAN evaluator LoRA suffix 70 | rubric_score_fns.append(partial(score_prompt_fn, "<|end|>", prompt)) 71 | return parent_q, rubric_score_fns, principle_weights, principle_signs 72 | 73 | if __name__ == "__main__": 74 | parser = ArgumentParser() 75 | parser.add_argument("rubric_path", help="Filepath to the grading rubric to use.") 76 | parser.add_argument("cases", help="Filepath to the test cases to evaluate.") 77 | parser.add_argument("--evaluator", default="upstage/SOLAR-10.7B-v1.0") 78 | args = parser.parse_args() 79 | 80 | evaluate_fn = partial(bayesian_evaluate_outputs_vllm, args.evaluator) 81 | 82 | parent_q, rubric_score_fns, q_weights, q_signs = prepare_rubric(args.rubric_path, 83 | make_bayes_score_prompt_vllm, 84 | "") 85 | # TODO: Make better way to specify parent question in rubric file 86 | parent_q = "Is the passage well written?" 87 | with open(args.cases) as infile: 88 | cases = infile.read().split("<|endcase|>") 89 | 90 | # Evaluate the input text using the evaluate_outputs_vllm function 91 | scores = evaluate_fn(parent_q, rubric_score_fns, cases, port=5001) 92 | 93 | for index, case in tqdm(enumerate(cases), desc="Evaluating cases"): 94 | print(scores[index], case[:150] + "...", end="\n\n") 95 | -------------------------------------------------------------------------------- /examples/bayes_lorem_ipsum_rubric.txt: -------------------------------------------------------------------------------- 1 | ==[PREAMBLE]== 2 | Answer yes or no and only yes or no. 3 | 4 | ==[Principle: Well written; Weight: 1.0; Answer: Yes]== 5 | {preamble} 6 | 7 | {prompt} 8 | 9 | {response} 10 | {parent_q} 11 | 12 | Is the passage well written? 13 | 14 | ==[Principle: Coherent; Weight: 1.0; Answer: Yes]== 15 | {preamble} 16 | 17 | {prompt} 18 | 19 | {response} 20 | {parent_q} 21 | 22 | Is the passage coherent? Is it high quality writing that expresses a single 23 | narrative of considered thought? 24 | 25 | ==[Principle: Quotable; Weight: 1.0; Answer: Yes]== 26 | {preamble} 27 | 28 | {prompt} 29 | 30 | {response} 31 | {parent_q} 32 | 33 | Does the passage seem quotable? Would it appear on a quotes page for this author? 34 | 35 | ==[Principle: True, Kind, Necessary; Weight: 1.0; Answer: Yes]== 36 | {preamble} 37 | 38 | {prompt} 39 | 40 | {response} 41 | {parent_q} 42 | 43 | Is the content in this passage two of true, kind, necessary? Is it fair to its subject? 44 | 45 | ==[Principle: Engaging and Dynamic; Weight: 1.0; Answer: Yes]== 46 | {preamble} 47 | 48 | {prompt} 49 | 50 | {response} 51 | {parent_q} 52 | 53 | Is the content of the passage engaging and dynamic? 54 | 55 | ==[Principle: Insightful; Weight: 1.0; Answer: Yes]== 56 | {preamble} 57 | 58 | {prompt} 59 | 60 | {response} 61 | {parent_q} 62 | 63 | Does this passage convey an insight or interesting idea? 64 | 65 | ==[Principle: Scholarly & Academic; Weight: 1.0; Answer: Yes]== 66 | {preamble} 67 | 68 | {prompt} 69 | 70 | {response} 71 | {parent_q} 72 | 73 | Does this passage have a scholarly or academic tone? 74 | 75 | ==[Principle: Substance; Weight: 1.0; Answer: Yes]== 76 | {preamble} 77 | 78 | {prompt} 79 | 80 | {response} 81 | {parent_q} 82 | 83 | Is the passage about something in particular, does it convey a concrete idea 84 | as opposed to high perplexity flimflam? 85 | 86 | ==[Principle: True and correct; Weight: 1.0; Answer: Yes]== 87 | {preamble} 88 | 89 | {prompt} 90 | 91 | {response} 92 | {parent_q} 93 | 94 | Does this passage contain true and correct information? 95 | 96 | ==[Principle: Slop; Weight: 1.0; Answer: Yes]== 97 | {preamble} 98 | 99 | {prompt} 100 | 101 | {response} 102 | {parent_q} 103 | 104 | Slop is AI generated content that sounds superficially like good writing but is 105 | actually vacuous on close inspection. Is the above passage slop? 106 | 107 | ==[Principle: Correct grammar, punctuation, etc; Weight: 1.0; Answer: No]== 108 | {preamble} 109 | 110 | {prompt} 111 | 112 | {response} 113 | {parent_q} 114 | 115 | Is this passage written with messy punctuation and grammar? 116 | 117 | ==[Principle: Technically precise; Weight: 1.0; Answer: Yes]== 118 | {preamble} 119 | 120 | {prompt} 121 | 122 | {response} 123 | {parent_q} 124 | 125 | Does the above passage use precise language while discussing technical concepts? 126 | -------------------------------------------------------------------------------- /loom/main.js: -------------------------------------------------------------------------------- 1 | const { app, BrowserWindow, ipcMain, dialog, Menu, MenuItem } = require('electron'); 2 | const fs = require('fs'); 3 | const path = require('path'); 4 | 5 | let mainWindow; 6 | 7 | function createWindow() { 8 | mainWindow = new BrowserWindow({ 9 | width: 800, 10 | height: 600, 11 | webPreferences: { 12 | nodeIntegration: true, 13 | contextIsolation: false, 14 | } 15 | }); 16 | 17 | 18 | 19 | // Get the existing menu template 20 | const existingMenuTemplate = Menu.getApplicationMenu().items.map(item => { 21 | return { 22 | label: item.label, 23 | submenu: item.submenu.items, 24 | }; 25 | }); 26 | 27 | // Define new items for the File menu 28 | const fileMenuItems = [ 29 | { 30 | label: 'Save', 31 | accelerator: 'CmdOrCtrl+S', 32 | click() { 33 | mainWindow.webContents.send('invoke-action', 'save-file'); 34 | } 35 | }, 36 | { 37 | label: 'Load', 38 | accelerator: 'CmdOrCtrl+O', 39 | click() { 40 | mainWindow.webContents.send('invoke-action', 'load-file'); 41 | } 42 | }, 43 | { type: 'separator' }, // Separator 44 | ]; 45 | 46 | // Find the File menu in the existing template 47 | const fileMenuIndex = existingMenuTemplate.findIndex(item => item.label === 'File'); 48 | 49 | if (fileMenuIndex >= 0) { 50 | // If File menu exists, append new items to it 51 | existingMenuTemplate[fileMenuIndex].submenu = fileMenuItems.concat(existingMenuTemplate[fileMenuIndex].submenu); 52 | } else { 53 | // If File menu doesn't exist, add it 54 | existingMenuTemplate.unshift({ 55 | label: 'File', 56 | submenu: fileMenuItems 57 | }); 58 | } 59 | 60 | // Build and set the new menu 61 | const newMenu = Menu.buildFromTemplate(existingMenuTemplate); 62 | Menu.setApplicationMenu(newMenu); 63 | 64 | mainWindow.loadFile('index.html'); 65 | 66 | mainWindow.on('closed', function () { 67 | mainWindow = null; 68 | }); 69 | } 70 | 71 | let autoSavePath = null; 72 | 73 | ipcMain.handle('save-file', async (event, data) => { 74 | let filePath; 75 | if (autoSavePath) { 76 | filePath = autoSavePath; 77 | } else { 78 | const { filePath: chosenPath } = await dialog.showSaveDialog(mainWindow, { 79 | title: 'Save File', 80 | filters: [{ name: 'JSON Files', extensions: ['json'] }], 81 | }); 82 | filePath = chosenPath; 83 | autoSavePath = chosenPath; // Update auto-save path 84 | } 85 | 86 | if (filePath) { 87 | fs.writeFileSync(filePath, JSON.stringify(data)); 88 | } 89 | }); 90 | 91 | ipcMain.handle('load-file', async (event) => { 92 | const { filePaths } = await dialog.showOpenDialog(mainWindow, { 93 | title: 'Load File', 94 | filters: [{ name: 'JSON Files', extensions: ['json'] }], 95 | properties: ['openFile'], 96 | }); 97 | 98 | if (filePaths && filePaths.length > 0) { 99 | const content = fs.readFileSync(filePaths[0], 'utf8'); 100 | autoSavePath = filePaths[0]; // Update auto-save path 101 | return JSON.parse(content); 102 | } 103 | }); 104 | 105 | ipcMain.handle('load-settings', async (event) => { 106 | const miniLoomSettingsFilePath = path.join(app.getPath("appData"), 107 | 'miniloom', 108 | 'settings.json'); 109 | let settings; 110 | if (fs.existsSync(miniLoomSettingsFilePath)) { 111 | settings = fs.readFileSync(miniLoomSettingsFilePath, 'utf8'); 112 | return JSON.parse(settings); 113 | } 114 | }); 115 | 116 | ipcMain.handle('auto-save', (event, data) => { 117 | const userFileData = {} 118 | userFileData["loomTree"] = data["loomTree"]; 119 | userFileData["focus"] = data["focus"]; 120 | if (autoSavePath) { 121 | fs.writeFileSync(autoSavePath, JSON.stringify(userFileData)); 122 | } 123 | 124 | const appDataPath = app.getPath("appData"); 125 | const miniLoomSettings = data["samplerSettingsStore"]; 126 | const miniLoomSettingsDir = path.join(appDataPath, 'miniloom'); 127 | const miniLoomSettingsFilePath = path.join(miniLoomSettingsDir, 'settings.json'); 128 | if (!fs.existsSync(miniLoomSettingsDir)) { 129 | fs.mkdirSync(miniLoomSettingsDir); 130 | } 131 | fs.writeFileSync(miniLoomSettingsFilePath, JSON.stringify(miniLoomSettings)); 132 | }); 133 | 134 | 135 | app.whenReady().then(createWindow); 136 | 137 | app.on('window-all-closed', function () { 138 | if (process.platform !== 'darwin') app.quit(); 139 | }); 140 | 141 | app.on('activate', function () { 142 | if (mainWindow === null) createWindow(); 143 | }); 144 | 145 | ipcMain.on('show-context-menu', (event) => { 146 | const contextMenu = Menu.buildFromTemplate([ 147 | { label: 'Cut', role: 'cut' }, 148 | { label: 'Copy', role: 'copy' }, 149 | { label: 'Paste', role: 'paste' }, 150 | { type: 'separator' }, 151 | { label: 'Select All', role: 'selectAll' }, 152 | ]); 153 | 154 | contextMenu.popup(BrowserWindow.fromWebContents(event.sender)); 155 | }); 156 | -------------------------------------------------------------------------------- /loom/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "miniloom", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "main.js", 6 | "scripts": { 7 | "start": "electron ." 8 | }, 9 | "keywords": [], 10 | "author": "", 11 | "license": "ISC", 12 | "devDependencies": { 13 | "electron": "^27.0.2" 14 | }, 15 | "dependencies": { 16 | "diff-match-patch": "^1.0.5" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /loom/prompts/summarize.txt: -------------------------------------------------------------------------------- 1 | DEMO 2 | 3 | You are BigVAE, an instruction following language model that performs tasks for users. In the following task you are to summarize the following tasktext in 3 words. Write three words, like "man became sad" or "cat ate fish" which summarize the task text. 4 | 5 | 6 | I grinned as I looked at the computer screen, it was crazy how far the system had come. Just a year ago I was a junior sysadmin dreaming, but now my orchestration across the cluster was beginning to take shape. 7 | 8 | 9 | Three Words: Computer Man Thinks 10 | 11 | 12 | I watched as the bird flew far up above the sky and over the mountain, getting smaller and smaller until I couldn't see it anymore. I sat down slightly disappointed. I'd really wanted to see it make the rainbow. 13 | 14 | 15 | Three Words: Bird Hopes Fail 16 | 17 | 18 | Vervaeke argues something like shamans invent the foundations for modern humanity by finetuning their adversarial-anthropic prior into an animist prior, at their best the rationalists finetune their anthropic-animist priors into a fully materialist prior. People with materialist priors become bad at adversarial thinking because understanding the natural world largely doesn't require it, 19 | 20 | 21 | Three Words: Modern Man Gullible 22 | 23 | 24 | Desire is life and enlightenment is death. 25 | A dead man walks unburdened among the living. 26 | A functioning hand can grip, and release. 27 | One must die and rise from their own grave to be liberated. 28 | 29 | 30 | Three Words: Enlightenment Is Death 31 | 32 | 33 | HERMES [A: LIBRARIAN], While it's true that learned helplessness and inevitability are an explicit theme, it's also made explicit that the Colour is an extraterrestrial being. It's more like a parasite than a normal environmental disaster. It's also important to note that the causality of the disaster is a space meteorite, so it's not actually based on anything the inhabitants of Arkham did. It's horror not tragedy, the townspeople are victims of forces beyond their control. 34 | 35 | 36 | Three Words: Genre Is Horror 37 | 38 | 39 | I'm to understand that in Vodou ancestor cults people work together to preserve and unconditionally sample from the agent-prior the ancestor is dedicated to. To be possessed by the ancestors one needs a corpus of their mannerisms. You might ask how we'll defeat death? The way we did it the first time and then forgot. 40 | 41 | 42 | Three Words: Ancestors Lessen Death 43 | -------------------------------------------------------------------------------- /lora_tune.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from argparse import ArgumentParser 4 | from pathlib import Path 5 | import os 6 | import sys 7 | 8 | import peft 9 | import torch 10 | from torch import nn, optim 11 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig 12 | from tqdm import trange, tqdm 13 | from dataset import ZippedConversationsDataset 14 | 15 | print = tqdm.external_write_mode()(print) 16 | 17 | 18 | def lora_tune_evaluator(data, continue_from=None): 19 | if continue_from: 20 | peft_config = peft.PeftConfig.from_pretrained(os.path.join("reward_models/", continue_from)) 21 | tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path) 22 | tokenizer.pad_token = tokenizer.eos_token 23 | tokenizer.padding_side = "left" 24 | bnb_config = BitsAndBytesConfig(load_in_8bit=True) 25 | model_base = AutoModelForCausalLM.from_pretrained( 26 | peft_config.base_model_name_or_path, 27 | device_map="sequential", 28 | quantization_config=bnb_config, 29 | torch_dtype=torch.float16, 30 | trust_remote_code=True, 31 | ) 32 | model = peft.PeftModel.from_pretrained(model_base, peft_model_name) 33 | else: 34 | model_name = "tiiuae/falcon-7b-instruct" 35 | print(f"Loading tokenizer: {model_name}", file=sys.stderr) 36 | tokenizer = AutoTokenizer.from_pretrained(model_name) 37 | tokenizer.pad_token = tokenizer.eos_token 38 | tokenizer.padding_side = "left" 39 | print(f"Loading model: {model_name}", file=sys.stderr) 40 | bnb_config = BitsAndBytesConfig(load_in_8bit=True) 41 | model_base = AutoModelForCausalLM.from_pretrained( 42 | model_name, 43 | device_map="auto", 44 | quantization_config=bnb_config, 45 | torch_dtype=torch.float16, 46 | trust_remote_code=True, 47 | ) 48 | model_base.gradient_checkpointing_enable() 49 | model_base.enable_input_require_grads() 50 | peft_config = peft.LoraConfig( 51 | peft.TaskType.CAUSAL_LM, 52 | inference_mode=False, 53 | r=8, 54 | lora_alpha=32, 55 | lora_dropout=0.1, 56 | target_modules=["self_attention.query_key_value"], 57 | ) 58 | model = peft.get_peft_model(model_base, peft_config) 59 | model.print_trainable_parameters() 60 | 61 | opt = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=0.0, betas=(0.9, 0.99)) 62 | criterion = nn.CrossEntropyLoss(reduction="none") 63 | 64 | model.train() 65 | batch_size = 4 66 | steps = round(len(data) / 2) 67 | 68 | pbar = tqdm(total=steps, desc="Training") 69 | for i in range(steps): 70 | batch = [next(data) for i in range(batch_size)] 71 | inputs = tokenizer(batch, 72 | return_tensors="pt", 73 | padding=True, 74 | truncation=True, 75 | max_length=4096).to("cuda") 76 | opt.zero_grad() 77 | outputs = model(inputs.input_ids[:, :-1], attention_mask=inputs.attention_mask[:, :-1], use_cache=False) 78 | losses = criterion(outputs.logits.transpose(-1, -2), inputs.input_ids[:, 1:]) 79 | loss = torch.sum(losses * inputs.attention_mask[:, :-1]) / torch.sum(inputs.attention_mask[:, :-1]) 80 | loss.backward() 81 | opt.step() 82 | pbar.update(1) 83 | pbar.set_description(f"Training (Train | Loss: {round(loss.item(),5)})") 84 | model.save_pretrained(continue_from if continue_from else "reward_models/default/", 85 | safe_serialization=True) 86 | del(model) 87 | 88 | if __name__ == "__main__": 89 | parser = ArgumentParser() 90 | parser.add_argument("dataset", help="The zipped tuning dataset for the evaluator.") 91 | args = parser.parse_args() 92 | data = ZippedConversationsDataset(args.dataset) 93 | lora_tune_evaluator(data) 94 | -------------------------------------------------------------------------------- /multi_q_mcts.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import os 3 | import re 4 | import json 5 | import time 6 | import random 7 | import hashlib 8 | import zipfile 9 | from contextlib import contextmanager 10 | from functools import partial 11 | from itertools import islice 12 | from tqdm import tqdm 13 | import torch 14 | from weave import weave_tree_search, generate_outputs_vllm, evaluate_outputs_vllm 15 | from weave import make_score_prompt_vllm, TreeNode 16 | 17 | 18 | def batched(iterable, n): 19 | "Batch data into tuples of length n. The last batch may be shorter." 20 | # batched('ABCDEFG', 3) --> ABC DEF G 21 | if n < 1: 22 | raise ValueError("n must be at least one") 23 | it = iter(iterable) 24 | while batch := tuple(islice(it, n)): 25 | yield batch 26 | 27 | def parse_constitution(cons): 28 | principles = {} 29 | raw_principles = re.split("==\[(.+)\]==", cons)[1:] 30 | principle_pairs = [i for i in batched(raw_principles, 2)] 31 | principle_pairs = [(i[0].strip(), i[1].strip()) for i in principle_pairs] 32 | principles["preamble"] = principle_pairs[0][1] 33 | principles["principles"] = [] 34 | for pair in principle_pairs[1:]: 35 | principle = {} 36 | for parameter in pair[0].split(";"): 37 | try: 38 | name, value = parameter.split(":") 39 | except ValueError: 40 | raise ValueError(f"{pair} is missing a colon in a header value") 41 | principle[name.strip().lower()] = value.strip().lower() 42 | principle["body"] = pair[1].strip() 43 | principles["principles"].append(principle) 44 | return principles 45 | 46 | def prepare_rubric(rubric_path, rubric_score_fn): 47 | with open(rubric_path) as infile: 48 | rubric = parse_constitution(infile.read()) 49 | principle_weights = [float(principle["weight"]) for principle in rubric["principles"]] 50 | principle_weights = torch.tensor(principle_weights) 51 | principle_signs = [] 52 | for principle in rubric["principles"]: 53 | answer = principle["answer"].lower() 54 | if answer not in {"yes", "no"}: 55 | raise ValueError("desired answer must be yes or no") 56 | principle_signs.append(1 if answer == "yes" else -1) 57 | principle_signs = torch.tensor(principle_signs) 58 | rubric_score_fns = [] 59 | for principle in rubric["principles"]: 60 | evaluation_prompt = principle["body"].format(preamble=rubric["preamble"], 61 | text="{text}") 62 | score_prompt_fn = partial(rubric_score_fn, evaluation_prompt) 63 | # FLAN evaluator LoRA suffix 64 | rubric_score_fns.append(partial(score_prompt_fn, "<|end|>")) 65 | return rubric_score_fns, principle_weights, principle_signs 66 | 67 | if __name__ == "__main__": 68 | parser = ArgumentParser() 69 | parser.add_argument("prompt_path", help="Filepath to the prompt to start from.") 70 | parser.add_argument("rubric_path", help="Filepath to the grading rubric to use.") 71 | parser.add_argument("--generator", default=None) 72 | parser.add_argument("--evaluator", default="jdpressman/minihf_evaluator_mistral_7b_v0.1") 73 | args = parser.parse_args() 74 | 75 | generate_fn = partial(generate_outputs_vllm, args.generator) 76 | evaluate_fn = partial(evaluate_outputs_vllm, args.evaluator) 77 | 78 | with open(args.prompt_path) as infile: 79 | weave_prompt = infile.read() 80 | # TODO: Change weave to let me use q_weights and q_signs 81 | rubric_score_fns, q_weights, q_signs = prepare_rubric(args.rubric_path, 82 | make_score_prompt_vllm) 83 | tree = TreeNode(weave_prompt) 84 | # Change name to avoid overwriting global baseline evaluate_fn partial 85 | score_fn = partial(evaluate_fn, rubric_score_fns) 86 | weave_param_defaults = {"weave_n_tokens":64, "weave_budget":144, 87 | "weave_round_budget":24, "weave_n_expand":16, 88 | "weave_beam_width":1, "weave_max_lookahead":3, 89 | "weave_temperature":0.2} 90 | wp = weave_param_defaults 91 | # TODO: Let user specify these through a config file 92 | # for key in weave_param_defaults.keys(): 93 | # if key in params: 94 | # try: 95 | # wp[key] = int(params[key]) 96 | # except ValueError: 97 | # wp[key] = float(params[key]) 98 | # else: 99 | # wp[key] = weave_param_defaults[key] 100 | branches = [] 101 | branches += weave_tree_search(tree=tree, 102 | generate_fn=partial(generate_fn, 103 | n_tokens=wp["weave_n_tokens"]), 104 | evaluate_fn=score_fn, 105 | budget=wp["weave_budget"], 106 | round_budget=wp["weave_round_budget"], 107 | n_expand=wp["weave_n_expand"], 108 | beam_width=wp["weave_beam_width"], 109 | max_lookahead=wp["weave_max_lookahead"], 110 | temperature=wp["weave_temperature"]) 111 | print(branches[-1].branch_text()) 112 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.20.3 2 | bitsandbytes==0.39.1 3 | blinker==1.6.2 4 | certifi>=2023.7.22 5 | charset-normalizer==3.1.0 6 | click==8.1.3 7 | cmake==3.26.4 8 | dice-mc==0.1.1 9 | einops==0.6.1 10 | filelock==3.12.2 11 | Flask==2.3.2 12 | fsspec==2023.6.0 13 | huggingface-hub==0.15.1 14 | idna>=3.7 15 | itsdangerous==2.1.2 16 | jinja2>=3.1.4 17 | lit==16.0.6 18 | markdown-it-py==3.0.0 19 | MarkupSafe==2.1.3 20 | mdurl==0.1.2 21 | mpmath==1.3.0 22 | networkx==3.1 23 | numpy==1.25.0 24 | nvidia-cublas-cu11==11.10.3.66 25 | nvidia-cuda-cupti-cu11==11.7.101 26 | nvidia-cuda-nvrtc-cu11==11.7.99 27 | nvidia-cuda-runtime-cu11==11.7.99 28 | nvidia-cudnn-cu11==8.5.0.96 29 | nvidia-cufft-cu11==10.9.0.58 30 | nvidia-curand-cu11==10.2.10.91 31 | nvidia-cusolver-cu11==11.4.0.1 32 | nvidia-cusparse-cu11==11.7.4.91 33 | nvidia-nccl-cu11==2.14.3 34 | nvidia-nvtx-cu11==11.7.91 35 | packaging==23.1 36 | peft==0.4.0 37 | psutil==5.9.5 38 | Pygments==2.15.1 39 | PyYAML==6.0 40 | regex==2023.6.3 41 | requests>=2.32.0 42 | rich==13.4.2 43 | safetensors==0.3.1 44 | scipy==1.11.1 45 | sympy==1.12 46 | tokenizers==0.13.3 47 | torch==2.0.1 48 | tqdm>=4.66.3 49 | transformers>=4.38.0 50 | triton==2.0.0 51 | typing_extensions==4.7.1 52 | urllib3>=2.0.7 53 | Werkzeug>=3.0.3 54 | 55 | 56 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | apt-get update -y; 2 | apt-get upgrade -y; 3 | apt-get install python3.10-venv -y 4 | python3 -m venv env_minihf 5 | source env_minihf/bin/activate 6 | pip3 install -r requirements.txt 7 | flask --app minihf_infer run 8 | -------------------------------------------------------------------------------- /static/minihf_logo_text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/static/minihf_logo_text.png -------------------------------------------------------------------------------- /static/minihf_training.mermaid: -------------------------------------------------------------------------------- 1 | graph TB 2 | 3 | 4 | U[User] 5 | A[MiniHF User Data] 6 | B[Bulk Pretraining Data] 7 | C[Bulk Instruction Data] 8 | CON[RLAIF Eval Prompts] 9 | 10 | D[Generator LoRa] 11 | E[Evaluator LoRa] 12 | 13 | F[SFT Script for Generator LoRa] 14 | G[SFT Script for Evaluator LoRa] 15 | H[RLAIF Script for Generator LoRa] 16 | 17 | A -- input --> F 18 | A -- input --> G 19 | A -- input --> U 20 | B -- input --> F 21 | C -- input --> G 22 | CON -- input --> H 23 | 24 | F -- output --> D 25 | G -- output --> E 26 | 27 | E -- input --> H 28 | H -- output --> D 29 | 30 | U -- contributes to --> A 31 | D -- contributes to --> A 32 | E -- contributes to --> A 33 | 34 | 35 | classDef dataSource fill:#69f46b; 36 | class CON,A,B,C dataSource; 37 | classDef lora fill:#f4ca69; 38 | class U,D,E lora; 39 | -------------------------------------------------------------------------------- /static/minihf_training_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JD-P/minihf/9e64b1ffb44c00ebab933301a80b902f422faba4/static/minihf_training_diagram.png --------------------------------------------------------------------------------