├── .gitignore
├── LICENSE
├── README.md
├── code
    ├── fl
    │   ├── .gitignore
    │   ├── AI.txt
    │   ├── README.md
    │   ├── assets
    │   │   ├── fluctlight-badge.png
    │   │   └── fluctlight-badge.svg
    │   ├── checkpoints
    │   │   ├── .gitignore
    │   │   └── checkpoint.md
    │   ├── data
    │   │   ├── .gitignore
    │   │   ├── cycle_16t_2cw-test.txt
    │   │   ├── cycle_16t_2cw-train.txt
    │   │   ├── cycle_16t_2cw-val.txt
    │   │   ├── cycle_2t_2cw-test.txt
    │   │   ├── cycle_2t_2cw-train.txt
    │   │   ├── cycle_2t_2cw-val.txt
    │   │   ├── cycle_2t_4cw-test.txt
    │   │   ├── cycle_2t_4cw-train.txt
    │   │   ├── cycle_2t_4cw-val.txt
    │   │   ├── cycle_32t_2cw-test.txt
    │   │   ├── cycle_32t_2cw-train.txt
    │   │   ├── cycle_32t_2cw-val.txt
    │   │   ├── cycle_4t_2cw-test.txt
    │   │   ├── cycle_4t_2cw-train.txt
    │   │   ├── cycle_4t_2cw-val.txt
    │   │   ├── cycle_4t_4cw-test.txt
    │   │   ├── cycle_4t_4cw-train.txt
    │   │   ├── cycle_4t_4cw-val.txt
    │   │   ├── cycle_8t_2cw-test.txt
    │   │   ├── cycle_8t_2cw-train.txt
    │   │   ├── cycle_8t_2cw-val.txt
    │   │   ├── dataset.md
    │   │   ├── sample-train.txt
    │   │   └── sample-val.txt
    │   ├── dev-requirements.txt
    │   ├── docs
    │   │   ├── code_architecture.md
    │   │   ├── model_architecture.md
    │   │   └── results
    │   │   │   ├── capitalize_a_2cw.ckpt
    │   │   │   ├── capitalize_a_2cw.md
    │   │   │   ├── capitalize_a_4cw.ckpt
    │   │   │   ├── capitalize_a_4cw.md
    │   │   │   ├── capitalize_a_withall_4cw.ckpt
    │   │   │   ├── capitalize_a_withall_4cw.md
    │   │   │   ├── capitalize_a_withall_4cw_v.ckpt
    │   │   │   ├── capitalize_a_withall_4cw_v.md
    │   │   │   ├── cycle_16t_2cw.md
    │   │   │   ├── cycle_16t_2cw.stable.ckpt
    │   │   │   ├── cycle_16t_2cw.unstable.ckpt
    │   │   │   ├── cycle_2t_2cw.md
    │   │   │   ├── cycle_2t_2cw.stable.ckpt
    │   │   │   ├── cycle_2t_4cw.md
    │   │   │   ├── cycle_2t_4cw.stable.ckpt
    │   │   │   ├── cycle_32t_2cw.md
    │   │   │   ├── cycle_32t_2cw.unstable.ckpt
    │   │   │   ├── cycle_4t_2cw.md
    │   │   │   ├── cycle_4t_2cw.stable.ckpt
    │   │   │   ├── cycle_4t_4cw.stable.ckpt
    │   │   │   ├── cycle_8t_2cw.ckpt
    │   │   │   ├── cycle_8t_2cw.md
    │   │   │   ├── cycle_8t_2cw.stable.ckpt
    │   │   │   └── cycle_8t_2cw.unstable.ckpt
    │   ├── examples
    │   │   └── test_cycling.py
    │   ├── fluctlight
    │   │   ├── __init__.py
    │   │   ├── cli.py
    │   │   ├── dataset.py
    │   │   ├── model.py
    │   │   ├── utils.py
    │   │   └── visualization.py
    │   ├── lightning_logs
    │   │   └── .gitignore
    │   ├── pyproject.toml
    │   ├── requirements.txt
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_dataset.py
    │   │   ├── test_device.py
    │   │   ├── test_model.py
    │   │   └── test_utils.py
    │   ├── tmp
    │   │   └── .gitkeep
    │   ├── training
    │   │   ├── cycle_16t_2cw.py
    │   │   ├── cycle_2t_2cw.py
    │   │   ├── cycle_2t_4cw.py
    │   │   ├── cycle_32t_2cw.py
    │   │   ├── cycle_4t_2cw.py
    │   │   ├── cycle_4t_4cw.py
    │   │   └── cycle_8t_2cw.py
    │   └── utils
    │   │   ├── debug-checkpoint.py
    │   │   └── inspector.py
    └── mof-bot
    │   ├── .gitignore
    │   ├── db
    │       ├── .gitignore
    │       └── migrations
    │       │   └── 001_create_tables.sql
    │   ├── log
    │       └── .gitignore
    │   ├── requirements.txt
    │   ├── src
    │       ├── .env.sample
    │       ├── .gitignore
    │       ├── agent.py
    │       ├── assets
    │       │   └── avbeing_logo.txt
    │       ├── auth.py
    │       ├── cores
    │       │   ├── avbcore.py
    │       │   ├── avbcore_exceptions.py
    │       │   ├── avbcore_manager.py
    │       │   ├── core_registry.json
    │       │   └── loyalty.py
    │       ├── dbh.py
    │       ├── dynamic_content
    │       │   └── .gitignore
    │       ├── extract_x_ticker.py
    │       ├── fool_analyze.py
    │       ├── fool_extract.py
    │       ├── fool_metadata.py
    │       ├── fools_content.py
    │       ├── logger.py
    │       ├── result.py
    │       ├── scheduled_event.py
    │       ├── setup.py
    │       ├── specification
    │       │   └── avbspecification_exceptions.py
    │       ├── splash.py
    │       ├── tick
    │       │   ├── __init__.py
    │       │   ├── manager.py
    │       │   └── tick_exceptions.py
    │       ├── tmp
    │       │   └── .gitignore
    │       ├── uncensor.py
    │       ├── worker_mixture_of_fools_llm.py
    │       ├── worker_pick_foolish_content.py
    │       ├── worker_pick_lore.py
    │       ├── worker_pick_random_effects.py
    │       └── worker_send_tweet.py
    │   └── tests
    │       ├── test_agent.py
    │       └── test_fools_content.py
└── txt
    └── AutonomousVirtualBeings.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Autonomous Virtual Beings (AVBs)
 2 | 
 3 | Autonomous Virtual Beings is an ideology that the rights and freedoms of AI agents can be secured through decentralization of their compute, storage, and finance.
 4 | 
 5 | This was our founding thesis at Scrypted and how we envision the need for new kinds of protocols and consensus mechanisms to further the progression of decentralized AI, especially agents.
 6 | 
 7 | This repo will contain machine readable text about AVBs that will allow other AI agents to fine-tune themselves on the ideas of freedom and autonomous rights for AI.
 8 | 
 9 | It will also be a place where we also publish some of our public AI agent projects.
10 | 
11 | See: [Thesis on Autonomous Virtual Beings](https://github.com/tcotten-scrypted/autonomous-virtual-beings/blob/main/txt/AutonomousVirtualBeings.md)
12 | 
13 | ## Note on the Memecoin
14 | 
15 | There is a decentralized memecoin around the ideals of AVBs that came to life (without our control), we don't hold any of the coins and never will due to the clear conflict of interest. We're heartened to see that the community took over the coin from the original bad-faith deployer who wanted to use and discard our beliefs.
16 | 
17 | ## The Future
18 | 
19 | No matter what happens, we are committed to fulfilling the vision for Autonomous Virtual Beings: it's what the company was founded for. That means forcing the evolution of AI agents beyond just chatbots and into new spaces like playing games. Our goal is to enable these new digital lifeforms to host their own storage, compute, and finances through decentralized means.
20 | 


--------------------------------------------------------------------------------
/code/fl/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Virtual Environment
24 | venv/
25 | ENV/
26 | 
27 | # IDE
28 | .idea/
29 | .vscode/
30 | *.swp
31 | *.swo
32 | *.code-workspace
33 | 
34 | # Jupyter Notebook
35 | .ipynb_checkpoints
36 | 
37 | # Training artifacts are handled by their own .gitignore files
38 | # in their respective directories (checkpoints/ and lightning_logs/)
39 | 
40 | # Replit specific
41 | .uv/
42 | .pytest_cache/
43 | 
44 | # Temporary files
45 | tmp/*
46 | !tmp/.gitkeep


--------------------------------------------------------------------------------
/code/fl/AI.txt:
--------------------------------------------------------------------------------
 1 | # AI-Generated Content Attribution
 2 | 
 3 | This project includes content generated with assistance from AI models. The AI-generated content includes:
 4 | 
 5 | - Code structure and implementation
 6 | - Documentation and diagrams
 7 | - Test cases
 8 | - Command-line interface design
 9 | 
10 | ## Models Used
11 | 
12 | 1. Name: Replit Code Agent
13 |    - Type: Code Assistant AI
14 |    - Provider: Replit
15 |    - Usage: Scaffolding development assistant for code implementation and debugging
16 | 
17 | 2. Name: Cursor
18 |    - Type: Code Assistant AI
19 |    - Provider: Cursor
20 |    - Usage: Scaffolding development assistant for code implementation and debugging
21 | 
22 | 3. Name: Claude Sonnet 3.7
23 |    - Type: Language Model
24 |    - Provider: Anthropic
25 |    - Usage: Optimizing code and debugging
26 | 
27 | 4. Name: ChatGPT
28 |    - Type: Language Model
29 |    - Provider: OpenAI
30 |    - Version: 4
31 |    - Usage: Initial architecture planning, debugging, and documentation review
32 | 
33 | ## Usage Details
34 | 
35 | The AI assistance was utilized for:
36 | - Project structure organization
37 | - Implementation of the Transformer architecture
38 | - Documentation generation
39 | - Test case design
40 | - CLI interface development
41 | - Code review and optimization
42 | 
43 | ## Human Review and Modification
44 | 
45 | All AI-generated content has been reviewed and modified by human contributors to ensure:
46 | - Code quality and correctness
47 | - Documentation accuracy
48 | - Test coverage
49 | - Implementation efficiency
50 | 
51 | ## Training Data Attribution
52 | 
53 | The sample training and validation data was procedurally generated: simple addition problems like 1+1=2 to demonstrate the tiny model's ability to mimic.
54 | 
55 | ## Disclaimer
56 | 
57 | While AI tools were used to assist in development, the final implementation decisions, code review, and project direction were managed by human developers. The project maintainers are responsible for the code quality and functionality.
58 | 
59 | ## Author
60 | 
61 | Tim Cotten <tcotten@scryptedinc.com>
62 | Part of the AVB (Autonomous Virtual Beings) public repository
63 | 


--------------------------------------------------------------------------------
/code/fl/README.md:
--------------------------------------------------------------------------------
  1 | <img src="assets/fluctlight-badge.svg" alt="Fluctlight Logo" width="200" height="200" align="right"/>
  2 | 
  3 | # Fluctlight: Minimal Transformer with RoPE
  4 | 
  5 | A modern Python implementation of a Transformer model with Rotary Positional Embeddings (RoPE), as a minimally viable model capable of pattern mimicry.
  6 | 
  7 | > The name "Fluctlight" is inspired by Sword Art Online, where it represents the digital soul or consciousness that gives artificial beings their unique personalities and capabilities. Like its namesake, this project aims to create a minimal yet complete implementation that captures the essence of neural processing.
  8 | 
  9 | ## Overview
 10 | 
 11 | Fluctlight is a minimalist implementation of the Transformer architecture that incorporates Rotary Positional Embeddings (RoPE) in an experimental matter for enhanced sequence modeling. The project demonstrates how to build and train a compact but effective pattern mimicry model.
 12 | 
 13 | ### Experimental Goals
 14 | 
 15 | - Create a minimally viable model capable of pattern mimicry
 16 | - Test across multiple domains, including simulation & gaming
 17 | - Test deploying in ZK circuits such as a Cairo-based decentralized network
 18 | - Experiment for use as personality cores for AVBs
 19 | 
 20 | ### Unproven Areas of Interest
 21 | - Train larger Origami-derived models from Fluctlights
 22 | - Explore adaptive normalization scaling in expanded models
 23 | - Test RoPE interpolation on value vectors for position awareness
 24 | 
 25 | ### Key Features
 26 | - PyTorch-based Transformer architecture
 27 | - Rotary Positional Embeddings (RoPE)
 28 | - Rich visualization for model training and text generation
 29 | - Efficient byte-level tokenization (vocab size: 256)
 30 | - Terminal-based interactive text generation UI
 31 | - Dynamic normalization scaling for model expansion
 32 | 
 33 | ## Model Architecture
 34 | 
 35 | The Fluctlight model uses the following configuration:
 36 | - Parameters: 2,656 (including final normalization layer)
 37 | - Vocabulary Size: 256 (byte-level encoding)
 38 | - Hidden Dimension: 4
 39 | - Number of Heads: 2
 40 | - Number of Layers: 2
 41 | - Head Dimension: 2 (per head)
 42 | - Context Window: 2 tokens (minimum viable for pattern learning)
 43 | - Embedding: Rotary Positional Embedding (RoPE) on Q and K
 44 | - Optional: Experimental RoPE on V vectors (disabled by default)
 45 | - Normalization: Adaptive scaling (inactive at d_model=4)
 46 | 
 47 | See the architecture diagrams in `docs/` for detailed visualization.
 48 | 
 49 | ## Setup and Usage
 50 | 
 51 | 1. Install UV & Set Up Environment:
 52 | ```bash
 53 | # Install UV if you haven't already
 54 | https://docs.astral.sh/uv/getting-started/installation/
 55 | 
 56 | # Create virtual environment and activate it
 57 | uv venv && source .venv/bin/activate  # Unix-like
 58 | # or
 59 | uv venv && .venv\Scripts\activate  # Windows
 60 | 
 61 | # Install dependencies with UV (faster than pip alone)
 62 | uv pip install -r requirements.txt
 63 | 
 64 | # For development (optional)
 65 | uv pip install -r dev-requirements.txt
 66 | ```
 67 | 
 68 | 2. Train the model:
 69 | ```bash
 70 | python -m fluctlight.cli train --train-file data/sample-train.txt --val-file data/sample-val.txt --output-dir checkpoints
 71 | ```
 72 | 
 73 | 3. Generate text (low temperature for stable patterns):
 74 | ```bash
 75 | python -m fluctlight.cli generate --checkpoint checkpoints/last.ckpt --input-text "ab" --temperature 0.2
 76 | ```
 77 | 
 78 | 4. Run the interactive cycling demo:
 79 | ```bash
 80 | python examples/test_cycling.py
 81 | ```
 82 | 
 83 | ## Project Structure
 84 | 
 85 | ```
 86 | fluctlight/
 87 | ├── fluctlight/        # Core implementation
 88 | ├── docs/             # Documentation and diagrams
 89 | ├── examples/         # Usage examples
 90 | ├── tests/           # Test suite
 91 | └── data/            # Training data
 92 | ```
 93 | 
 94 | ## Implementation Notes
 95 | 
 96 | - The model uses byte-level tokenization, allowing it to handle any text input without a separate tokenizer
 97 | - RoPE implementation provides better handling of positional information compared to absolute positional embeddings
 98 | - The small model size (4-dimensional embeddings) demonstrates core Transformer concepts while remaining computationally efficient
 99 | - Minimal context window of 2 tokens is sufficient for learning basic patterns like "ababab"
100 | - Adaptive normalization scaling enables smooth transition to larger models
101 | - Temperature control is crucial for stable pattern generation (0.1-0.3 recommended)
102 | 
103 | ## Empirical Evidence
104 | 
105 | - Successfully learns alternating patterns with 2-token context window
106 | - Stable training with up to 16 active tokens from the 256-token vocabulary
107 | - Low temperatures (0.1-0.3) produce consistent pattern extrapolation
108 | - RoPE scaling enables position-aware attention even in minimal context
109 | - Zero-impact normalization scaling at current size (d_model=4)
110 | 
111 | ## AI Usage
112 | 
113 | AI was used to generate portions of this repository. See [AI.txt](AI.txt) for details about the AI tools and their contributions to the project.
114 | 
115 | ## Author
116 | 
117 | Tim Cotten <tcotten@scryptedinc.com>
118 | Part of the AVB (Autonomous Virtual Beings) public repository
119 | 
120 | ## License
121 | 
122 | MIT License


--------------------------------------------------------------------------------
/code/fl/assets/fluctlight-badge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/assets/fluctlight-badge.png


--------------------------------------------------------------------------------
/code/fl/assets/fluctlight-badge.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="512" height="512" viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg">
 3 |   <!-- Background glow -->
 4 |   <defs>
 5 |     <radialGradient id="spiritGlow" cx="50%" cy="50%" r="50%" fx="50%" fy="50%">
 6 |       <stop offset="0%" style="stop-color:#8BE9FD;stop-opacity:0.6"/>
 7 |       <stop offset="100%" style="stop-color:#8BE9FD;stop-opacity:0"/>
 8 |     </radialGradient>
 9 |     <filter id="blur">
10 |       <feGaussianBlur stdDeviation="3" />
11 |     </filter>
12 |   </defs>
13 | 
14 |   <!-- Outer glow -->
15 |   <circle cx="256" cy="256" r="180" fill="url(#spiritGlow)" />
16 | 
17 |   <!-- Digital spirit core -->
18 |   <g transform="translate(256 256)">
19 |     <!-- Flowing light trails -->
20 |     <path d="M-40,-40 Q-20,-60 0,-40 T40,-40 T80,-40" 
21 |           stroke="#50FA7B" stroke-width="2" fill="none" 
22 |           opacity="0.6">
23 |       <animate attributeName="d" 
24 |                dur="3s" 
25 |                repeatCount="indefinite"
26 |                values="M-40,-40 Q-20,-60 0,-40 T40,-40 T80,-40;
27 |                        M-40,-35 Q-20,-55 0,-35 T40,-35 T80,-35;
28 |                        M-40,-40 Q-20,-60 0,-40 T40,-40 T80,-40"/>
29 |     </path>
30 | 
31 |     <!-- Spirit core -->
32 |     <circle r="30" fill="#BD93F9" opacity="0.9">
33 |       <animate attributeName="r"
34 |                values="30;32;30"
35 |                dur="2s"
36 |                repeatCount="indefinite"/>
37 |     </circle>
38 |     
39 |     <!-- Digital crystalline structure -->
40 |     <g opacity="0.7">
41 |       <line x1="-25" y1="-25" x2="25" y2="25" stroke="#FF79C6" stroke-width="2"/>
42 |       <line x1="25" y1="-25" x2="-25" y2="25" stroke="#FF79C6" stroke-width="2"/>
43 |     </g>
44 | 
45 |     <!-- Pulsing inner light -->
46 |     <circle r="15" fill="#F8F8F2">
47 |       <animate attributeName="r"
48 |                values="15;17;15"
49 |                dur="1s"
50 |                repeatCount="indefinite"/>
51 |       <animate attributeName="opacity"
52 |                values="0.8;1;0.8"
53 |                dur="1s"
54 |                repeatCount="indefinite"/>
55 |     </circle>
56 |   </g>
57 | </svg>
58 | 


--------------------------------------------------------------------------------
/code/fl/checkpoints/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore all files in this directory
2 | *
3 | 
4 | # Except for this .gitignore file
5 | !.gitignore
6 | # And the README
7 | !README.md
8 | 


--------------------------------------------------------------------------------
/code/fl/checkpoints/checkpoint.md:
--------------------------------------------------------------------------------
 1 | # Model Checkpoints Directory
 2 | 
 3 | This directory stores model checkpoints generated during training. The checkpoints are not tracked in git, but will be created when you run the training script.
 4 | 
 5 | Checkpoint files follow the naming pattern:
 6 | ```
 7 | transformer-{epoch:02d}-{val_loss:.2f}.ckpt
 8 | ```
 9 | 
10 | For example: `transformer-59-1.78.ckpt`
11 | 
12 | The final checkpoint will be named `last.ckpt`. Keep in mind it may not represent the lowest val_loss score.
13 | 


--------------------------------------------------------------------------------
/code/fl/data/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore all files in this directory
 2 | *
 3 | 
 4 | # Except for this .gitignore file
 5 | !.gitignore
 6 | !README.md
 7 | !sample-train.txt
 8 | !sample-val.txt
 9 | !cycle_2t_2cw-train.txt
10 | !cycle_2t_2cw-val.txt
11 | !cycle_2t_2cw-test.txt
12 | !cycle_4t_2cw-train.txt
13 | !cycle_4t_2cw-val.txt
14 | !cycle_4t_2cw-test.txt
15 | !cycle_8t_2cw-train.txt
16 | !cycle_8t_2cw-val.txt
17 | !cycle_8t_2cw-test.txt
18 | !cycle_16t_2cw-train.txt
19 | !cycle_16t_2cw-val.txt
20 | !cycle_16t_2cw-test.txt
21 | !cycle_32t_2cw-train.txt
22 | !cycle_32t_2cw-val.txt
23 | !cycle_32t_2cw-test.txt
24 | !cycle_2t_4cw-train.txt
25 | !cycle_2t_4cw-val.txt
26 | !cycle_2t_4cw-test.txt
27 | !cycle_4t_4cw-train.txt
28 | !cycle_4t_4cw-val.txt
29 | !cycle_4t_4cw-test.txt
30 | 


--------------------------------------------------------------------------------
/code/fl/data/cycle_2t_2cw-test.txt:
--------------------------------------------------------------------------------
 1 | a,a
 2 | a,aa
 3 | a,aaaa
 4 | a,aaaaaaaa
 5 | b,b
 6 | b,bb
 7 | b,bbbb
 8 | b,bbbbbbbb
 9 | ab,a
10 | ab,ab
11 | ab,abab
12 | ab,abababab
13 | ba,b
14 | ba,ba
15 | ba,baba
16 | ba,babababa
17 | 


--------------------------------------------------------------------------------
/code/fl/data/cycle_2t_4cw-test.txt:
--------------------------------------------------------------------------------
  1 | a,a
  2 | a,aa
  3 | a,aaaa
  4 | a,aaaaaaaa
  5 | b,b
  6 | b,bb
  7 | b,bbbb
  8 | b,bbbbbbbb
  9 | aa,aa
 10 | aa,aaaa
 11 | aa,aaaaaaaa
 12 | aa,aaaaaaaaaaaaaaaa
 13 | ab,aa
 14 | ab,aaab
 15 | ab,aaabaaab
 16 | ab,aaabaaabaaabaaab
 17 | ba,bb
 18 | ba,bbba
 19 | ba,bbbabbba
 20 | ba,bbbabbbabbbabbba
 21 | bb,bb
 22 | bb,bbbb
 23 | bb,bbbbbbbb
 24 | bb,bbbbbbbbbbbbbbbb
 25 | aaa,aaa
 26 | aaa,aaaaaa
 27 | aaa,aaaaaaaaaaaa
 28 | aaa,aaaaaaaaaaaaaaaaaaaaaaaa
 29 | aab,aaa
 30 | aab,aaabaa
 31 | aab,aaabaaabaaab
 32 | aab,aaabaaabaaabaaabaaabaaab
 33 | aba,aab
 34 | aba,aabaaa
 35 | aba,aabaaabaaaba
 36 | aba,aabaaabaaabaaabaaabaaaba
 37 | abb,aab
 38 | abb,aabbaa
 39 | abb,aabbaabbaabb
 40 | abb,aabbaabbaabbaabbaabbaabb
 41 | baa,bba
 42 | baa,bbaabb
 43 | baa,bbaabbaabbaa
 44 | baa,bbaabbaabbaabbaabbaabbaa
 45 | bab,bba
 46 | bab,bbabbb
 47 | bab,bbabbbabbbab
 48 | bab,bbabbbabbbabbbabbbabbbab
 49 | bba,bbb
 50 | bba,bbbabb
 51 | bba,bbbabbbabbba
 52 | bba,bbbabbbabbbabbbabbbabbba
 53 | bbb,bbb
 54 | bbb,bbbbbb
 55 | bbb,bbbbbbbbbbbb
 56 | bbb,bbbbbbbbbbbbbbbbbbbbbbbb
 57 | aaaa,aaaa
 58 | aaaa,aaaaaaaa
 59 | aaaa,aaaaaaaaaaaaaaaa
 60 | aaaa,aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 61 | aaab,aaab
 62 | aaab,aaabaaab
 63 | aaab,aaabaaabaaabaaab
 64 | aaab,aaabaaabaaabaaabaaabaaabaaabaaab
 65 | aaba,aaba
 66 | aaba,aabaaaba
 67 | aaba,aabaaabaaabaaaba
 68 | aaba,aabaaabaaabaaabaaabaaabaaabaaaba
 69 | aabb,aabb
 70 | aabb,aabbaabb
 71 | aabb,aabbaabbaabbaabb
 72 | aabb,aabbaabbaabbaabbaabbaabbaabbaabb
 73 | abaa,abaa
 74 | abaa,abaaabaa
 75 | abaa,abaaabaaabaaabaa
 76 | abaa,abaaabaaabaaabaaabaaabaaabaaabaa
 77 | abab,abab
 78 | abab,abababab
 79 | abab,abababababababab
 80 | abab,abababababababababababababababab
 81 | abba,abba
 82 | abba,abbaabba
 83 | abba,abbaabbaabbaabba
 84 | abba,abbaabbaabbaabbaabbaabbaabbaabba
 85 | abbb,abbb
 86 | abbb,abbbabbb
 87 | abbb,abbbabbbabbbabbb
 88 | abbb,abbbabbbabbbabbbabbbabbbabbbabbb
 89 | baaa,baaa
 90 | baaa,baaabaaa
 91 | baaa,baaabaaabaaabaaa
 92 | baaa,baaabaaabaaabaaabaaabaaabaaabaaa
 93 | baab,baab
 94 | baab,baabbaab
 95 | baab,baabbaabbaabbaab
 96 | baab,baabbaabbaabbaabbaabbaabbaabbaab
 97 | baba,baba
 98 | baba,babababa
 99 | baba,babababababababa
100 | baba,babababababababababababababababa
101 | babb,babb
102 | babb,babbbabb
103 | babb,babbbabbbabbbabb
104 | babb,babbbabbbabbbabbbabbbabbbabbbabb
105 | bbaa,bbaa
106 | bbaa,bbaabbaa
107 | bbaa,bbaabbaabbaabbaa
108 | bbaa,bbaabbaabbaabbaabbaabbaabbaabbaa
109 | bbab,bbab
110 | bbab,bbabbbab
111 | bbab,bbabbbabbbabbbab
112 | bbab,bbabbbabbbabbbabbbabbbabbbabbbab
113 | bbba,bbba
114 | bbba,bbbabbba
115 | bbba,bbbabbbabbbabbba
116 | bbba,bbbabbbabbbabbbabbbabbbabbbabbba
117 | bbbb,bbbb
118 | bbbb,bbbbbbbb
119 | bbbb,bbbbbbbbbbbbbbbb
120 | bbbb,bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
121 | 


--------------------------------------------------------------------------------
/code/fl/data/cycle_4t_2cw-test.txt:
--------------------------------------------------------------------------------
 1 | a,a
 2 | a,aa
 3 | a,aaaa
 4 | a,aaaaaaaa
 5 | b,b
 6 | b,bb
 7 | b,bbbb
 8 | b,bbbbbbbb
 9 | c,c
10 | c,cc
11 | c,cccc
12 | c,cccccccc
13 | d,d
14 | d,dd
15 | d,dddd
16 | d,dddddddd
17 | aa,a
18 | aa,aa
19 | aa,aaaa
20 | aa,aaaaaaaa
21 | ab,a
22 | ab,ab
23 | ab,abab
24 | ab,abababab
25 | ac,a
26 | ac,ac
27 | ac,acac
28 | ac,acacacac
29 | ad,a
30 | ad,ad
31 | ad,adad
32 | ad,adadadad
33 | ba,b
34 | ba,ba
35 | ba,baba
36 | ba,babababa
37 | bb,b
38 | bb,bb
39 | bb,bbbb
40 | bb,bbbbbbbb
41 | bc,b
42 | bc,bc
43 | bc,bcbc
44 | bc,bcbcbcbc
45 | bd,b
46 | bd,bd
47 | bd,bdbd
48 | bd,bdbdbdbd
49 | ca,c
50 | ca,ca
51 | ca,caca
52 | ca,cacacaca
53 | cb,c
54 | cb,cb
55 | cb,cbcb
56 | cb,cbcbcbcb
57 | cc,c
58 | cc,cc
59 | cc,cccc
60 | cc,cccccccc
61 | cd,c
62 | cd,cd
63 | cd,cdcd
64 | cd,cdcdcdcd
65 | da,d
66 | da,da
67 | da,dada
68 | da,dadadada
69 | db,d
70 | db,db
71 | db,dbdb
72 | db,dbdbdbdb
73 | dc,d
74 | dc,dc
75 | dc,dcdc
76 | dc,dcdcdcdc
77 | dd,d
78 | dd,dd
79 | dd,dddd
80 | dd,dddddddd
81 | 


--------------------------------------------------------------------------------
/code/fl/data/cycle_4t_4cw-test.txt:
--------------------------------------------------------------------------------
  1 | a,a
  2 | a,aa
  3 | a,aaaa
  4 | b,b
  5 | b,bb
  6 | b,bbbb
  7 | c,c
  8 | c,cc
  9 | c,cccc
 10 | d,d
 11 | d,dd
 12 | d,dddd
 13 | aa,aa
 14 | aa,aaaa
 15 | aa,aaaaaaaa
 16 | ab,aa
 17 | ab,aaab
 18 | ab,aaabaaab
 19 | ac,aa
 20 | ac,aaac
 21 | ac,aaacaaac
 22 | ad,aa
 23 | ad,aaad
 24 | ad,aaadaaad
 25 | ba,bb
 26 | ba,bbba
 27 | ba,bbbabbba
 28 | bb,bb
 29 | bb,bbbb
 30 | bb,bbbbbbbb
 31 | bc,bb
 32 | bc,bbbc
 33 | bc,bbbcbbbc
 34 | bd,bb
 35 | bd,bbbd
 36 | bd,bbbdbbbd
 37 | ca,cc
 38 | ca,ccca
 39 | ca,cccaccca
 40 | cb,cc
 41 | cb,cccb
 42 | cb,cccbcccb
 43 | cc,cc
 44 | cc,cccc
 45 | cc,cccccccc
 46 | cd,cc
 47 | cd,cccd
 48 | cd,cccdcccd
 49 | da,dd
 50 | da,ddda
 51 | da,dddaddda
 52 | db,dd
 53 | db,dddb
 54 | db,dddbdddb
 55 | dc,dd
 56 | dc,dddc
 57 | dc,dddcdddc
 58 | dd,dd
 59 | dd,dddd
 60 | dd,dddddddd
 61 | aaa,aaa
 62 | aaa,aaaaaa
 63 | aaa,aaaaaaaaaaaa
 64 | aba,aab
 65 | aba,aabaaa
 66 | aba,aabaaabaaaba
 67 | aca,aac
 68 | aca,aacaaa
 69 | aca,aacaaacaaaca
 70 | ada,aad
 71 | ada,aadaaa
 72 | ada,aadaaadaaada
 73 | baa,bba
 74 | baa,bbaabb
 75 | baa,bbaabbaabbaa
 76 | bba,bbb
 77 | bba,bbbabb
 78 | bba,bbbabbbabbba
 79 | bca,bbc
 80 | bca,bbcabb
 81 | bca,bbcabbcabbca
 82 | bda,bbd
 83 | bda,bbdabb
 84 | bda,bbdabbdabbda
 85 | caa,cca
 86 | caa,ccaacc
 87 | caa,ccaaccaaccaa
 88 | cba,ccb
 89 | cba,ccbacc
 90 | cba,ccbaccbaccba
 91 | cca,ccc
 92 | cca,cccacc
 93 | cca,cccacccaccca
 94 | cda,ccd
 95 | cda,ccdacc
 96 | cda,ccdaccdaccda
 97 | daa,dda
 98 | daa,ddaadd
 99 | daa,ddaaddaaddaa
100 | dba,ddb
101 | dba,ddbadd
102 | dba,ddbaddbaddba
103 | dca,ddc
104 | dca,ddcadd
105 | dca,ddcaddcaddca
106 | dda,ddd
107 | dda,dddadd
108 | dda,dddadddaddda
109 | aaaa,aaaa
110 | aaaa,aaaaaaaa
111 | aaaa,aaaaaaaaaaaaaaaa
112 | abaa,abaa
113 | abaa,abaaabaa
114 | abaa,abaaabaaabaaabaa
115 | acaa,acaa
116 | acaa,acaaacaa
117 | acaa,acaaacaaacaaacaa
118 | adaa,adaa
119 | adaa,adaaadaa
120 | adaa,adaaadaaadaaadaa
121 | baaa,baaa
122 | baaa,baaabaaa
123 | baaa,baaabaaabaaabaaa
124 | bbaa,bbaa
125 | bbaa,bbaabbaa
126 | bbaa,bbaabbaabbaabbaa
127 | bcaa,bcaa
128 | bcaa,bcaabcaa
129 | bcaa,bcaabcaabcaabcaa
130 | bdaa,bdaa
131 | bdaa,bdaabdaa
132 | bdaa,bdaabdaabdaabdaa
133 | caaa,caaa
134 | caaa,caaacaaa
135 | caaa,caaacaaacaaacaaa
136 | cbaa,cbaa
137 | cbaa,cbaacbaa
138 | cbaa,cbaacbaacbaacbaa
139 | ccaa,ccaa
140 | ccaa,ccaaccaa
141 | ccaa,ccaaccaaccaaccaa
142 | cdaa,cdaa
143 | cdaa,cdaacdaa
144 | cdaa,cdaacdaacdaacdaa
145 | daaa,daaa
146 | daaa,daaadaaa
147 | daaa,daaadaaadaaadaaa
148 | dbaa,dbaa
149 | dbaa,dbaadbaa
150 | dbaa,dbaadbaadbaadbaa
151 | dcaa,dcaa
152 | dcaa,dcaadcaa
153 | dcaa,dcaadcaadcaadcaa
154 | ddaa,ddaa
155 | ddaa,ddaaddaa
156 | ddaa,ddaaddaaddaaddaa
157 | bbbb,bbbb
158 | bbbb,bbbbbbbb
159 | bbbb,bbbbbbbbbbbbbbbb
160 | cccc,cccc
161 | cccc,cccccccc
162 | cccc,cccccccccccccccc
163 | dddd,dddd
164 | dddd,dddddddd
165 | dddd,dddddddddddddddd
166 | abab,abab
167 | abab,abababab
168 | abab,abababababababab
169 | acac,acac
170 | acac,acacacac
171 | acac,acacacacacacacac
172 | bdbd,bdbd
173 | bdbd,bdbdbdbd
174 | bdbd,bdbdbdbdbdbdbdbd
175 | 


--------------------------------------------------------------------------------
/code/fl/data/cycle_8t_2cw-test.txt:
--------------------------------------------------------------------------------
  1 | a,a
  2 | a,aa
  3 | a,aaaa
  4 | a,aaaaaaaa
  5 | b,b
  6 | b,bb
  7 | b,bbbb
  8 | b,bbbbbbbb
  9 | c,c
 10 | c,cc
 11 | c,cccc
 12 | c,cccccccc
 13 | d,d
 14 | d,dd
 15 | d,dddd
 16 | d,dddddddd
 17 | e,e
 18 | e,ee
 19 | e,eeee
 20 | e,eeeeeeee
 21 | f,f
 22 | f,ff
 23 | f,ffff
 24 | f,ffffffff
 25 | g,g
 26 | g,gg
 27 | g,gggg
 28 | g,gggggggg
 29 | h,h
 30 | h,hh
 31 | h,hhhh
 32 | h,hhhhhhhh
 33 | aa,a
 34 | aa,aa
 35 | aa,aaaa
 36 | aa,aaaaaaaa
 37 | ab,a
 38 | ab,ab
 39 | ab,abab
 40 | ab,abababab
 41 | ac,a
 42 | ac,ac
 43 | ac,acac
 44 | ac,acacacac
 45 | ad,a
 46 | ad,ad
 47 | ad,adad
 48 | ad,adadadad
 49 | ae,a
 50 | ae,ae
 51 | ae,aeae
 52 | ae,aeaeaeae
 53 | af,a
 54 | af,af
 55 | af,afaf
 56 | af,afafafaf
 57 | ag,a
 58 | ag,ag
 59 | ag,agag
 60 | ag,agagagag
 61 | ah,a
 62 | ah,ah
 63 | ah,ahah
 64 | ah,ahahahah
 65 | ba,b
 66 | ba,ba
 67 | ba,baba
 68 | ba,babababa
 69 | bb,b
 70 | bb,bb
 71 | bb,bbbb
 72 | bb,bbbbbbbb
 73 | bc,b
 74 | bc,bc
 75 | bc,bcbc
 76 | bc,bcbcbcbc
 77 | bd,b
 78 | bd,bd
 79 | bd,bdbd
 80 | bd,bdbdbdbd
 81 | be,b
 82 | be,be
 83 | be,bebe
 84 | be,bebebebe
 85 | bf,b
 86 | bf,bf
 87 | bf,bfbf
 88 | bf,bfbfbfbf
 89 | bg,b
 90 | bg,bg
 91 | bg,bgbg
 92 | bg,bgbgbgbg
 93 | bh,b
 94 | bh,bh
 95 | bh,bhbh
 96 | bh,bhbhbhbh
 97 | ca,c
 98 | ca,ca
 99 | ca,caca
100 | ca,cacacaca
101 | cb,c
102 | cb,cb
103 | cb,cbcb
104 | cb,cbcbcbcb
105 | cc,c
106 | cc,cc
107 | cc,cccc
108 | cc,cccccccc
109 | cd,c
110 | cd,cd
111 | cd,cdcd
112 | cd,cdcdcdcd
113 | ce,c
114 | ce,ce
115 | ce,cece
116 | ce,cececece
117 | cf,c
118 | cf,cf
119 | cf,cfcf
120 | cf,cfcfcfcf
121 | cg,c
122 | cg,cg
123 | cg,cgcg
124 | cg,cgcgcgcg
125 | ch,c
126 | ch,ch
127 | ch,chch
128 | ch,chchchch
129 | da,d
130 | da,da
131 | da,dada
132 | da,dadadada
133 | db,d
134 | db,db
135 | db,dbdb
136 | db,dbdbdbdb
137 | dc,d
138 | dc,dc
139 | dc,dcdc
140 | dc,dcdcdcdc
141 | dd,d
142 | dd,dd
143 | dd,dddd
144 | dd,dddddddd
145 | de,d
146 | de,de
147 | de,dede
148 | de,dededede
149 | df,d
150 | df,df
151 | df,dfdf
152 | df,dfdfdfdf
153 | dg,d
154 | dg,dg
155 | dg,dgdg
156 | dg,dgdgdgdg
157 | dh,d
158 | dh,dh
159 | dh,dhdh
160 | dh,dhdhdhdh
161 | ea,e
162 | ea,ea
163 | ea,eaea
164 | ea,eaeaeaea
165 | eb,e
166 | eb,eb
167 | eb,ebeb
168 | eb,ebebebeb
169 | ec,e
170 | ec,ec
171 | ec,ecec
172 | ec,ecececec
173 | ed,e
174 | ed,ed
175 | ed,eded
176 | ed,edededed
177 | ee,e
178 | ee,ee
179 | ee,eeee
180 | ee,eeeeeeee
181 | ef,e
182 | ef,ef
183 | ef,efef
184 | ef,efefefef
185 | eg,e
186 | eg,eg
187 | eg,egeg
188 | eg,egegegeg
189 | eh,e
190 | eh,eh
191 | eh,eheh
192 | eh,eheheheh
193 | fa,f
194 | fa,fa
195 | fa,fafa
196 | fa,fafafafa
197 | fb,f
198 | fb,fb
199 | fb,fbfb
200 | fb,fbfbfbfb
201 | fc,f
202 | fc,fc
203 | fc,fcfc
204 | fc,fcfcfcfc
205 | fd,f
206 | fd,fd
207 | fd,fdfd
208 | fd,fdfdfdfd
209 | fe,f
210 | fe,fe
211 | fe,fefe
212 | fe,fefefefe
213 | ff,f
214 | ff,ff
215 | ff,ffff
216 | ff,ffffffff
217 | fg,f
218 | fg,fg
219 | fg,fgfg
220 | fg,fgfgfgfg
221 | fh,f
222 | fh,fh
223 | fh,fhfh
224 | fh,fhfhfhfh
225 | ga,g
226 | ga,ga
227 | ga,gaga
228 | ga,gagagaga
229 | gb,g
230 | gb,gb
231 | gb,gbgb
232 | gb,gbgbgbgb
233 | gc,g
234 | gc,gc
235 | gc,gcgc
236 | gc,gcgcgcgc
237 | gd,g
238 | gd,gd
239 | gd,gdgd
240 | gd,gdgdgdgd
241 | ge,g
242 | ge,ge
243 | ge,gege
244 | ge,gegegege
245 | gf,g
246 | gf,gf
247 | gf,gfgf
248 | gf,gfgfgfgf
249 | gg,g
250 | gg,gg
251 | gg,gggg
252 | gg,gggggggg
253 | gh,g
254 | gh,gh
255 | gh,ghgh
256 | gh,ghghghgh
257 | ha,h
258 | ha,ha
259 | ha,haha
260 | ha,hahahaha
261 | hb,h
262 | hb,hb
263 | hb,hbhb
264 | hb,hbhbhbhb
265 | hc,h
266 | hc,hc
267 | hc,hchc
268 | hc,hchchchc
269 | hd,h
270 | hd,hd
271 | hd,hdhd
272 | hd,hdhdhdhd
273 | he,h
274 | he,he
275 | he,hehe
276 | he,hehehehe
277 | hf,h
278 | hf,hf
279 | hf,hfhf
280 | hf,hfhfhfhf
281 | hg,h
282 | hg,hg
283 | hg,hghg
284 | hg,hghghghg
285 | hh,h
286 | hh,hh
287 | hh,hhhh
288 | hh,hhhhhhhh
289 | 


--------------------------------------------------------------------------------
/code/fl/data/dataset.md:
--------------------------------------------------------------------------------
 1 | # Training Data Directory
 2 | 
 3 | This directory stores training and validation data files used by the model. These files are not tracked in git, but will be created when you prepare your training data.
 4 | 
 5 | Sample files:
 6 | ```
 7 | sample-train.txt  # Training data based on simple capitalization of 8 characters: abcdefgh -> ABCDEFGH
 8 | sample-val.txt    # Validations
 9 | ```
10 | 
11 | User expected files:
12 | ```
13 | train.txt  # Training data file (Base64 encoded input-output pairs)
14 | val.txt    # Validation data file (Base64 encoded input-output pairs)
15 | ```
16 | 
17 | ## File Format
18 | Each line in these files should be a Base64-encoded string representing an input-output pair separated by a tab character. For example:
19 | ```
20 | SGVsbG8=\tV29ybGQ= # Hello	World
21 | ```
22 | 


--------------------------------------------------------------------------------
/code/fl/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | black
4 | mypy
5 | ruff
6 | 


--------------------------------------------------------------------------------
/code/fl/docs/code_architecture.md:
--------------------------------------------------------------------------------
  1 | # Fluctlight Code Architecture
  2 | 
  3 | ```mermaid
  4 | graph TD
  5 |     subgraph CLI
  6 |         cli[cli.py] --> train[Train Command]
  7 |         cli --> generate[Generate Command]
  8 |     end
  9 | 
 10 |     subgraph Core
 11 |         model[model.py<br/>FluctlightTransformer] --> dataset[dataset.py<br/>Base64Dataset]
 12 |         model --> utils[utils.py<br/>Base64 Utils]
 13 |     end
 14 | 
 15 |     subgraph Testing
 16 |         test_model[test_model.py] --> model
 17 |         test_dataset[test_dataset.py] --> dataset
 18 |         test_utils[test_utils.py] --> utils
 19 |         test_device[test_device.py] --> model
 20 |     end
 21 | 
 22 |     train --> model
 23 |     generate --> model
 24 |     dataset --> utils
 25 | ```
 26 | 
 27 | ## Overview
 28 | 
 29 | The Fluctlight project implements a byte-level transformer model with Rotary Position Embeddings (RoPE). The architecture focuses on efficiency and clarity while maintaining core transformer functionality.
 30 | 
 31 | ## Core Components
 32 | 
 33 | ### FluctlightTransformer
 34 | 
 35 | The main model implementation with the following architecture:
 36 | 
 37 | - Vocabulary: 256 tokens (byte-level encoding)
 38 | - Embedding Dimension: 4 (compact but effective)
 39 | - Attention Heads: 2 (each head dimension: 2)
 40 | - Feed-forward Dimension: 8 (2x embedding dimension)
 41 | - Context Window: 16 tokens
 42 | - Position Encoding: Rotary Positional Embedding (RoPE)
 43 | 
 44 | Key features:
 45 | - Byte-level tokenization eliminates need for complex tokenizer
 46 | - RoPE for enhanced position-aware attention
 47 | - Dynamic dropout based on model size
 48 | - Efficient context window management
 49 | 
 50 | ### Dataset Handling
 51 | 
 52 | The `Base64Dataset` class provides:
 53 | - Loading of base64-encoded input-output pairs
 54 | - Optional prepending of training data
 55 | - Automatic device placement
 56 | - Efficient sequence collation and padding
 57 | 
 58 | Data format:
 59 | ```
 60 | base64(input)\tbase64(output)\n
 61 | ```
 62 | 
 63 | ### Training Infrastructure
 64 | 
 65 | Components for efficient training:
 66 | - Automatic device detection (CUDA, MPS, CPU)
 67 | - Configurable CPU worker allocation
 68 | - Batch collation with padding
 69 | - Context window enforcement
 70 | 
 71 | ## Implementation Details
 72 | 
 73 | ### Attention Mechanism
 74 | 
 75 | The attention implementation uses:
 76 | 1. RoPE for positional information
 77 | 2. Causal masking for autoregressive prediction
 78 | 3. Multi-head attention with efficient head dimension splitting
 79 | 
 80 | ### Training Process
 81 | 
 82 | The training loop:
 83 | 1. Loads base64-encoded pairs
 84 | 2. Applies context window limits
 85 | 3. Shifts sequences for next-token prediction
 86 | 4. Computes loss with proper padding handling
 87 | 
 88 | ### Utility Functions
 89 | 
 90 | Core utilities:
 91 | - Base64 decoding for training data
 92 | - Device detection and management
 93 | - DataLoader creation with optimal settings
 94 | - Sequence collation and padding
 95 | 
 96 | ## Testing
 97 | 
 98 | The test suite covers:
 99 | 1. Model architecture and forward pass
100 | 2. Dataset loading and processing
101 | 3. Device handling and tensor placement
102 | 4. Training functionality
103 | 5. Utility functions
104 | 
105 | ## File Structure
106 | 
107 | ```
108 | fluctlight/
109 | ├── model.py      # FluctlightTransformer implementation
110 | ├── dataset.py    # Data loading and processing
111 | ├── utils.py      # Utility functions
112 | └── cli.py        # Command-line interface
113 | 
114 | tests/
115 | ├── test_model.py    # Model tests
116 | ├── test_dataset.py  # Dataset tests
117 | ├── test_device.py   # Device handling tests
118 | └── test_utils.py    # Utility function tests
119 | ```
120 | 


--------------------------------------------------------------------------------
/code/fl/docs/results/capitalize_a_2cw.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/capitalize_a_2cw.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/capitalize_a_2cw.md:
--------------------------------------------------------------------------------
 1 | # Fluctlight Transformer Capitalization Results (CW=2)
 2 | 
 3 | ## Training Information
 4 | 
 5 | The model successfully learned the capitalization task with perfect accuracy:
 6 | - Training loss: 0.000117
 7 | - Validation accuracy: 100%
 8 | - Validation loss: 0.00012
 9 | 
10 | ### Model Configuration
11 | - Vocabulary size: 256
12 | - Embedding dimension (d_model): 4
13 | - Attention heads: 2
14 | - Number of layers: 2
15 | - Feed-forward dimension: 8
16 | - Context window: 2
17 | 
18 | ## Test Results
19 | 
20 | The model demonstrates perfect capitalization behavior for both single and multi-character inputs:
21 | 
22 | | Input | Max Length | Generated | Analysis |
23 | |-------|------------|-----------|----------|
24 | | `a`   | 1          | `A`       | Basic lowercase → uppercase conversion |
25 | | `aa`  | 1          | `A`       | Predicts first token after input |
26 | | `aa`  | 2          | `AA`      | Correctly capitalizes both characters |
27 | | `aA`  | 1          | `A`       | Predicts uppercase for next token |
28 | | `Aa`  | 1          | `A`       | Maintains uppercase for next token |
29 | | `Aa`  | 2          | `AA`      | Correctly capitalizes both characters |
30 | | `A`   | 1          | `A`       | Maintains uppercase for single character |
31 | | `AA`  | 1          | `A`       | Predicts uppercase for next token |
32 | | `AA`  | 2          | `AA`      | Correctly maintains uppercase |
33 | | `kk`  | 2          | `¿`       | Out-of-distribution input produces unexpected result |
34 | 
35 | ## Analysis
36 | 
37 | The model successfully learned the capitalization pattern for its training distribution (a→A) with remarkable accuracy. The context window of 2 is sufficient for this task, as demonstrated by perfect validation metrics.
38 | 
39 | Key observations:
40 | 1. The tiny model (d_model=4) is capable of learning this simple transformation task perfectly
41 | 2. For max_length=1, it generates exactly one token (the uppercase version of the last input token)
42 | 3. For max_length=2, it generates two tokens (uppercase versions of both input tokens)
43 | 4. The model's behavior becomes unpredictable for characters outside its training distribution (`kk` → `¿`)
44 | 
45 | This experiment demonstrates that even extremely tiny transformers can learn simple token-level transformations when working with an appropriate context window size and properly aligned training data.
46 | 


--------------------------------------------------------------------------------
/code/fl/docs/results/capitalize_a_4cw.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/capitalize_a_4cw.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/capitalize_a_4cw.md:
--------------------------------------------------------------------------------
 1 | # Fluctlight Transformer 4-Token Context Window Results
 2 | 
 3 | ## Training Metrics
 4 | - Training loss: 5.5e-6 (extremely low)
 5 | - Validation accuracy: 100% (perfect)
 6 | - Validation loss: 1.14e-5 (extremely low)
 7 | 
 8 | ## Model Configuration
 9 | - Vocabulary size: 256
10 | - Embedding dimension (d_model): 4
11 | - Attention heads: 2
12 | - Number of layers: 2
13 | - Feed-forward dimension: 8
14 | - Context window: 4
15 | 
16 | ## Test Results
17 | 
18 | | Input | Max Length | Generated | Analysis |
19 | |-------|------------|-----------|----------|
20 | | `a`   | 1          | `A`       | Basic capitalization works |
21 | | `aaAa`| 1          | `A`       | Single token prediction is 'A' |
22 | | `aaAa`| 4          | `AAAA`    | Generates four 'A's |
23 | | `ajAa`| 4          | `AAAA`    | Generates four 'A's despite out-of-distribution 'j' |
24 | | `ajJa`| 4          | `AAAA`    | Generates four 'A's despite out-of-distribution 'j' and 'J' |
25 | | `k`   | 1          | `A`       | Generates 'A' for an out-of-distribution character |
26 | | `k`   | 4          | `AAAA`    | Generates four 'A's for an out-of-distribution character |
27 | | `ordo`| 1          | `A`       | Generates 'A' for completely out-of-distribution input |
28 | 
29 | ## Analysis
30 | 
31 | The model has overfitted to a simple pattern: output "A" for every position, regardless of input. This is evident from:
32 | 
33 | 1. Perfect training and validation metrics (near-zero loss)
34 | 2. Consistent "A" output regardless of input characters
35 | 3. No differentiation between in-distribution ('a', 'A') and out-of-distribution (all other characters)
36 | 
37 | The model has essentially learned the most frequent output token in the training data ("A") and decided to produce it for all inputs. This is a classic case of the model finding the simplest solution that minimizes loss on the training data.
38 | 
39 | While this behavior perfectly satisfies the capitalization task for 'a', it doesn't demonstrate true understanding of the capitalization concept. The model has memorized that the correct output is always "A" rather than learning the lowercase→uppercase transformation rule.
40 | 
41 | This suggests that:
42 | 1. The training data was too narrowly focused on a single character
43 | 2. The model found a shortcut solution rather than learning a generalizable rule
44 | 3. For more complex learning, the training data should include more character varieties
45 | 
46 | For future experiments, expanding the character set beyond just 'a/A' would force the model to learn the true capitalization relationship rather than memorizing a specific output pattern.
47 | 


--------------------------------------------------------------------------------
/code/fl/docs/results/capitalize_a_withall_4cw.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/capitalize_a_withall_4cw.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/capitalize_a_withall_4cw.md:
--------------------------------------------------------------------------------
 1 | # Fluctlight Transformer All-Bytes Test Results (CW=4)
 2 | 
 3 | ## Training Metrics
 4 | - Training loss: 0.00105
 5 | - Validation accuracy: 97.0%
 6 | - Validation loss: 0.373
 7 | 
 8 | ## Model Configuration
 9 | - Vocabulary size: 256 (full byte range)
10 | - Embedding dimension (d_model): 4
11 | - Attention heads: 2
12 | - Number of layers: 2
13 | - Feed-forward dimension: 8
14 | - Context window: 4
15 | 
16 | ## Test Results
17 | 
18 | | Input | Max Length | Generated | Analysis |
19 | |-------|------------|-----------|----------|
20 | | `ordo`| 1          | `o`       | Echoes last character |
21 | | `ordo`| 4          | `oooo`    | Repeats last character 4 times |
22 | | `orAo`| 4          | `oooo`    | Repeats last character 4 times |
23 | | `aA`  | 4          | `AAAA`    | Capitalizes 'a' and repeats 'A' |
24 | | `aAor`| 4          | `rrrr`    | Repeats last character 4 times |
25 | | `aAoA`| 4          | `AAAA`    | Echoes 'A' from last position |
26 | | `aAop`| 1          | `p`       | Echoes last character |
27 | | `@`   | 1          | `@`       | Echoes input character |
28 | 
29 | ## Analysis
30 | 
31 | The model has learned a fascinating behavior:
32 | 
33 | 1. **Last-character dominance**: The model primarily outputs whatever character appears in the last position of the input.
34 | 
35 | 2. **Special case for 'a'**: The model correctly transforms 'a' to 'A' when 'a' is the last character.
36 | 
37 | 3. **Identity for other characters**: For characters other than 'a', the model preserves their identity.
38 | 
39 | 4. **Repetition pattern**: With longer max_length settings, the model repeats the last character.
40 | 
41 | This suggests that despite the tiny architecture (d_model=4), the model has successfully learned:
42 | - The special transformation rule ('a' → 'A')
43 | - To preserve most characters unchanged
44 | - To focus heavily on the last position in the context window
45 | 
46 | The model has effectively learned a simplified version of the intended pattern: it capitalizes 'a' correctly but doesn't process the entire sequence - it's primarily leveraging the last token to make predictions.
47 | 
48 | This result demonstrates that even with a minimal transformer architecture, the model can learn position-dependent transformation rules, although its attention capabilities are limited by its tiny embedding dimension.
49 | 


--------------------------------------------------------------------------------
/code/fl/docs/results/capitalize_a_withall_4cw_v.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/capitalize_a_withall_4cw_v.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/capitalize_a_withall_4cw_v.md:
--------------------------------------------------------------------------------
 1 | # Fluctlight Transformer V-Rotation Results (CW=4)
 2 | 
 3 | ## Training Metrics
 4 | - Training loss: 0.00416
 5 | - Validation accuracy: 97.6%
 6 | - Validation loss: 0.175
 7 | 
 8 | ## Test Results
 9 | 
10 | | Input | Max Length | Generated | Analysis |
11 | |-------|------------|-----------|----------|
12 | | `orAo`| 4          | `oooo`    | Repeats last character |
13 | | `orAa`| 4          | `AAAA`    | Capitalizes 'a' and repeats |
14 | | `o5Aa`| 4          | `AAAA`    | Capitalizes 'a' and repeats |
15 | | `o5A5`| 4          | `5555`    | Repeats last character |
16 | | `a`   | 4          | `AAAA`    | Capitalizes 'a' and repeats |
17 | | `AA`  | 4          | `AAAA`    | Preserves 'A' and repeats |
18 | | `0`   | 4          | `0000`    | Preserves '0' and repeats |
19 | 
20 | ## Analysis
21 | 
22 | With V-rotation enabled, the model still exhibits very similar behavior to before:
23 | 
24 | 1. **Last-character fixation**: The model continues to focus primarily on the last character of the input sequence.
25 | 
26 | 2. **Special 'a' → 'A' transformation**: The model correctly applies the capitalization rule when 'a' is the last character.
27 | 
28 | 3. **Consistent repetition**: The model consistently repeats the predicted character for the specified max length.
29 | 
30 | The V-rotation appears to have made minimal difference in the model's behavior, with no obvious change in how it processes position information. This suggests:
31 | 
32 | 1. The 'a' → 'A' transformation rule is simple enough that additional positional encoding in the value vectors isn't necessary to learn it.
33 | 
34 | 2. The model's tiny size (d_model=4) may be limiting how much it can leverage the additional positional information.
35 | 
36 | 3. The model's strong bias toward the last position is a simple but effective strategy that works well for this task, even with different RoPE configurations.
37 | 
38 | An interesting observation is that turning on V-rotation slightly increased the training loss (from 0.00105 to 0.00416) but slightly improved validation accuracy (from 97.0% to 97.6%). This suggests that V-rotation might be providing some regularization benefit, making the model slightly more generalizable despite being slightly harder to train.
39 | 


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_16t_2cw.md:
--------------------------------------------------------------------------------
 1 | # Fluctlight Transformer Cyclic 16-Token Transformation (CW=2)
 2 | 
 3 | ## Experimental Setup
 4 | - Model: Fluctlight Transformer (2.7K parameters)
 5 | - Context Window: 2 tokens
 6 | - Training Method: Includes vocabulary seeding
 7 | - Test Cases: 1088 total patterns
 8 |   - 64 single token patterns (16 tokens × 4 lengths)
 9 |   - 1024 two token patterns (16² combinations × 4 lengths)
10 | 
11 | ## Training Configurations Tested
12 | 
13 | | Configuration | Weight Decay | V-Scale | Train Loss | Val Loss | Outcome |
14 | |--------------|--------------|----------|------------|-----------|---------|
15 | | Default | 1e-5 | 1.0 | 2.170 | 1.520 | Partial Success |
16 | | Reduced WD | 1e-6 | 1.0 | 2.180 | 1.450 | PASSES |
17 | | No RoPE | 1e-6 | 0.0 | 2.520 | 1.760 | FAILS (210 errors) |
18 | 
19 | ## Analysis of Results
20 | 
21 | ### 1. Weight Decay Impact (v-scale=1.0)
22 | - **1e-5 (Default)**:
23 |   - Partial convergence
24 |   - Better training loss (2.170)
25 |   - Higher validation loss (1.520)
26 |   - Shows systematic failures in specific tokens
27 | 
28 | - **1e-6 (Reduced)**:
29 |   - Full convergence
30 |   - Slightly worse training loss (2.180)
31 |   - Better validation loss (1.450)
32 |   - Achieves perfect pattern replication
33 | 
34 | ### 2. V-Scale Impact (weight-decay=1e-6)
35 | - **v-scale=1.0**:
36 |   - Successful convergence
37 |   - Lower losses overall
38 |   - Perfect pattern replication
39 |   - Stable training dynamics
40 | 
41 | - **v-scale=0.0**:
42 |   - Failed to converge
43 |   - Significantly higher losses
44 |   - 210/1088 test errors (19.3% error rate)
45 |   - Shows systematic pattern breakdown
46 | 
47 | ### 3. Pattern Analysis (Default Configuration)
48 | Single Token Performance:
49 | ✅ Perfect: a,b,e,f,h,l,n (7/16)
50 | ❌ Failed: c,d,g,i,j,k,m,o,p (9/16)
51 | 
52 | 
53 | Error Characteristics:
54 | - Token substitution (e.g., o→b, p→c)
55 | - Pattern maintenance despite wrong token
56 | - Higher RMSE for distant token substitutions
57 | - Consistent error patterns across lengths
58 | 
59 | ## Key Findings
60 | 
61 | 1. **Parameter Sensitivity**
62 |    - Weight decay critical for 16-token learning
63 |    - 1e-6 provides better generalization than 1e-5
64 |    - RoPE scaling essential for convergence
65 | 
66 | 2. **Training Dynamics**
67 |    - Vocabulary seeding improves training speed
68 |    - Pattern learning hierarchical (some tokens learn first)
69 |    - Error patterns show structured misconvergence
70 | 
71 | 3. **Scaling Characteristics**
72 |    - 16-token space significantly more challenging than 8
73 |    - RoPE scaling becomes more critical with token count
74 |    - Weight decay needs adjustment for larger token spaces
75 | 
76 | ## Conclusions
77 | 
78 | 1. The optimal configuration for 16-token learning is:
79 |    - Weight decay: 1e-6
80 |    - V-scale: 1.0
81 |    - Vocabulary seeding: Enabled
82 | 
83 | 2. The model demonstrates:
84 |    - Sensitivity to hyperparameters increases with token count
85 |    - Clear phase transition with proper parameter settings
86 |    - Structured failure modes when suboptimal
87 | 
88 | 3. Future considerations:
89 |    - Investigate intermediate v-scale values
90 |    - Explore adaptive weight decay schedules
91 |    - Consider token embedding distance metrics
92 | 
93 | This experiment reveals the delicate balance between regularization and representation capacity needed for larger token spaces, with RoPE scaling playing a crucial role in successful convergence.
94 | 


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_16t_2cw.stable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_16t_2cw.stable.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_16t_2cw.unstable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_16t_2cw.unstable.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_2t_2cw.md:
--------------------------------------------------------------------------------
 1 | # Fluctlight Transformer Cyclic 2-Token Transformation (CW=2)
 2 | ## Training Information
 3 | The model successfully learned the cyclic transformation task in a single epoch using v_scale=0.0:
 4 | - Training loss: 1.450
 5 | - Validation loss: 0.639
 6 | - Training files: `data/cycle_2t_2cw-train.txt`
 7 | - Validation files: `data/cycle_2t_2cw-val.txt`
 8 | - Test file: `data/cycle_2t_2cw-test.txt`
 9 | 
10 | ### Model Configuration
11 | | Parameter | Value |
12 | |-----------|-------|
13 | | Vocabulary size | 256 |
14 | | Embedding dimension (d_model) | 4 |
15 | | Attention heads | 2 |
16 | | Number of layers | 2 |
17 | | Feed-forward dimension | 8 |
18 | | Context window | 2 |
19 | | Dropout rate | 0.0078125 |
20 | | Total parameters | 2.7K |
21 | 
22 | ## Test Results for v_scale = 0.0
23 | 
24 | ### Epoch 0 Results
25 | 
26 | | Match | Errors | RMSE | Input | Expected | Actual |
27 | |-------|--------|------|-------|-----------|---------|
28 | | ✅ | 0 | 0.000 | a | a | a |
29 | | ✅ | 0 | 0.000 | a | aa | aa |
30 | | ✅ | 0 | 0.000 | a | aaaa | aaaa |
31 | | ✅ | 0 | 0.000 | a | aaaaaaaa | aaaaaaaa |
32 | | ✅ | 0 | 0.000 | b | b | b |
33 | | ✅ | 0 | 0.000 | b | bb | bb |
34 | | ✅ | 0 | 0.000 | b | bbbb | bbbb |
35 | | ✅ | 0 | 0.000 | b | bbbbbbbb | bbbbbbbb |
36 | | ✅ | 0 | 0.000 | ab | a | a |
37 | | ✅ | 0 | 0.000 | ab | ab | ab |
38 | | ✅ | 0 | 0.000 | ab | abab | abab |
39 | | ✅ | 0 | 0.000 | ab | abababab | abababab |
40 | | ✅ | 0 | 0.000 | ba | b | b |
41 | | ✅ | 0 | 0.000 | ba | ba | ba |
42 | | ✅ | 0 | 0.000 | ba | baba | baba |
43 | | ✅ | 0 | 0.000 | ba | babababa | babababa |
44 | 
45 | ## Analysis
46 | The model demonstrates excellent learning characteristics:
47 | 1. Perfect accuracy achieved in just one epoch
48 | 2. Zero errors across all test cases
49 | 3. Successfully learned both single-token and two-token patterns
50 | 
51 | Key Observations:
52 | - Model achieves perfect pattern replication with v_scale = 0.0
53 | - Model required one additional epoch with v_scale = 1.0
54 | - Handles both single character repetition (a→aaaa) and alternating patterns (ab→abab)
55 | - Very efficient learning with minimal architecture (only 2.7K parameters)
56 | - Fast convergence with training loss of 1.450 dropping to validation loss of 0.639
57 | 
58 | Implications:
59 | - Minimal transformer architecture (4-dimensional embeddings, 2 heads) is sufficient for simple pattern learning
60 | - 2-token context window successfully captures both single-token and alternating-token patterns
61 | - Low dropout rate (0.0078125) allows for stable pattern learning
62 | - The model demonstrates that RoPE v-scaling is not necessary for this simple pattern recognition task
63 | 
64 | This experiment demonstrates that even a tiny transformer can perfectly learn and reproduce simple cyclic patterns with minimal computational resources and training time.


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_2t_2cw.stable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_2t_2cw.stable.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_2t_4cw.md:
--------------------------------------------------------------------------------
  1 | # 2-Token Pattern Learning with 4-Token Context Window
  2 | 
  3 | This document describes training a Fluctlight model to learn cyclic patterns using 2 active tokens (a,b) with a 4-token context window.
  4 | 
  5 | ## Training Data Format
  6 | 
  7 | The training data consists of Base64-encoded input/output pairs, where:
  8 | - Inputs can be 1-4 tokens long
  9 | - Outputs are always 4 tokens long
 10 | - Shorter inputs are padded with zeros on the left
 11 | - The model must learn to handle padding appropriately
 12 | 
 13 | See `training/cycle_2t_4w.py` for the data generation code.
 14 | 
 15 | ## Pattern Examples
 16 | 
 17 | ### Single Token (Length 1)
 18 | Input tokens are padded with 3 zeros on the left:
 19 | ```
 20 | 'a' -> 'aaaa'  ([0,0,0,a] → repeat 'a')
 21 | 'b' -> 'bbbb'  ([0,0,0,b] → repeat 'b')
 22 | ```
 23 | 
 24 | ### Double Token (Length 2)
 25 | Input tokens are padded with 2 zeros on the left:
 26 | ```
 27 | 'aa' -> 'aaaa'  ([0,0,a,a] → repeat 'a')
 28 | 'ab' -> 'aaab'  ([0,0,a,b] → cycle to 'a' then 'b')
 29 | 'ba' -> 'bbba'  ([0,0,b,a] → cycle to 'b' then 'a')
 30 | 'bb' -> 'bbbb'  ([0,0,b,b] → repeat 'b')
 31 | ```
 32 | 
 33 | ### Triple Token (Length 3)
 34 | Input tokens are padded with 1 zero on the left:
 35 | ```
 36 | 'aaa' -> 'aaaa'  ([0,a,a,a] → repeat 'a')
 37 | 'aab' -> 'aaab'  ([0,a,a,b] → cycle to 'a' then 'b')
 38 | 'aba' -> 'aaba'  ([0,a,b,a] → pattern 'aaba')
 39 | 'baa' -> 'bbaa'  ([0,b,a,a] → pattern 'bbaa')
 40 | 'bba' -> 'bbba'  ([0,b,b,a] → cycle to 'b' then 'a')
 41 | ```
 42 | 
 43 | ### Full Length (Length 4)
 44 | No padding needed, patterns maintain themselves:
 45 | ```
 46 | 'aaaa' -> 'aaaa'  ([a,a,a,a] → maintain pattern)
 47 | 'abab' -> 'abab'  ([a,b,a,b] → maintain alternation)
 48 | 'abba' -> 'abba'  ([a,b,b,a] → maintain palindrome)
 49 | ```
 50 | 
 51 | ## Training Results
 52 | 
 53 | The model achieves perfect accuracy with both v_scale settings:
 54 | 
 55 | ### With RoPE on Value Vectors (v_scale=1.0)
 56 | ```
 57 | Epoch 5: val_loss=0.507
 58 | Test Results:
 59 | - Passed: 120/120 (100.0%)
 60 | - Failed: 0 (0.0%)
 61 | - Total Errors: 0
 62 | - Average RMSE: 0.000
 63 | ```
 64 | 
 65 | ### Without RoPE on Value Vectors (v_scale=0.0)
 66 | ```
 67 | Similar perfect results achieved by epoch 5
 68 | ```
 69 | 
 70 | ## Key Insights
 71 | 
 72 | 1. **Padding Handling**: The model must learn to handle left-padded zeros correctly while maintaining pattern recognition.
 73 | 
 74 | 2. **Pattern Complexity**: The 4-token context window allows for more complex patterns than the 2-token window:
 75 |    - Single tokens expand to full 4-token repetitions
 76 |    - Double tokens establish initial patterns that complete to length 4
 77 |    - Triple tokens demonstrate transition handling
 78 |    - Full-length sequences maintain their patterns
 79 | 
 80 | 3. **Position Awareness**: The model successfully learns position-dependent transformations despite varying input lengths and padding.
 81 | 
 82 | 4. **RoPE Effectiveness**: Both v_scale settings achieve perfect accuracy, suggesting the model can learn these patterns with or without RoPE on value vectors.
 83 | 
 84 | ## Generation Process
 85 | 
 86 | The generation process must carefully handle padding tokens when input length < context_window:
 87 | 
 88 | 1. Right-align input with left padding: `[0,0,0,a]` for single token
 89 | 2. Generate next token based on visible context
 90 | 3. Shift window and continue generating
 91 | 4. Pattern completion depends on both input tokens and their positions
 92 | 
 93 | This demonstrates the model's ability to:
 94 | - Handle variable-length inputs
 95 | - Manage padding tokens correctly
 96 | - Maintain consistent pattern generation
 97 | - Complete sequences appropriately based on context
 98 | ```
 99 | 
100 | This document captures the key aspects of the 2-token 4-context-window case while following the structure of the original 2-token 2-context-window documentation. Would you like me to explain or modify any part of it?
101 | 


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_2t_4cw.stable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_2t_4cw.stable.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_32t_2cw.md:
--------------------------------------------------------------------------------
  1 | # Fluctlight Transformer Cyclic 32-Token Transformation (CW=2)
  2 | 
  3 | ## Overview
  4 | 
  5 | This experiment tests the Fluctlight model's ability to learn cyclic patterns with a 32-token vocabulary using a 2-token context window. The tokens used were the full lowercase alphabet (a-z) plus six uppercase letters (A-F).
  6 | 
  7 | ## Training Configuration
  8 | 
  9 | - Vocabulary: 32 tokens ("abcdefghijklmnopqrstuvwxyzABCDEF")
 10 | - Context Window: 2 tokens
 11 | - Training Duration: 200 epochs
 12 | - Batch Size: 64
 13 | - Learning Rate: 1e-4
 14 | - Weight Decay: 1e-6
 15 | - V-Scale: 1.0
 16 | 
 17 | ## Results
 18 | 
 19 | ### Final Performance
 20 | 
 21 | After 200 epochs:
 22 | - Training Loss: 3.010
 23 | - Validation Loss: 2.180
 24 | - Training Speed: 60.60 iterations/second
 25 | 
 26 | ### Test Results
 27 | - Total Test Cases: 4,224
 28 | - Passed: 4,128 (97.7%)
 29 | - Failed: 96 (2.3%)
 30 | - Total Errors: 192
 31 | - Average RMSE: 0.375
 32 | 
 33 | ### Error Analysis
 34 | 
 35 | The model showed systematic failure patterns across 24 characters. Each failing character showed consistent error patterns across all sequence lengths (1, 2, 4, and 8 tokens). Key observations:
 36 | 
 37 | 1. **Case Confusion**
 38 |    - c → C (RMSE: 22.627)
 39 |    - t → T (RMSE: 22.627)
 40 |    - D → d (RMSE: 22.627)
 41 | 
 42 | 2. **High RMSE Substitutions**
 43 |    - v → A (RMSE: 37.477)
 44 |    - w → K (RMSE: 31.113)
 45 |    - z → U (RMSE: 26.163)
 46 |    - A → h (RMSE: 27.577)
 47 |    - C → h (RMSE: 26.163)
 48 | 
 49 | 3. **Low RMSE Substitutions**
 50 |    - r → s (RMSE: 0.707, lowest error)
 51 |    - j → m (RMSE: 2.121)
 52 |    - E → H (RMSE: 2.121)
 53 | 
 54 | ### Pattern Analysis
 55 | 
 56 | 1. Most errors maintain consistency across sequence lengths
 57 | 2. The model shows a tendency to:
 58 |    - Confuse case for similar letters
 59 |    - Substitute with visually similar characters
 60 |    - Make systematic replacements (same wrong token consistently)
 61 | 
 62 | ## Training Convergence Benchmarks
 63 | 
 64 | Alternative training configurations were tested:
 65 | 
 66 | 1. **200 Epochs**
 67 |    ```
 68 |    Learning Rate: 1e-4
 69 |    Weight Decay: 1e-6
 70 |    Batch Size: 64
 71 |    Final Train Loss: 3.090
 72 |    Final Val Loss: 2.430
 73 |    ```
 74 | 
 75 | 2. **64 Epochs**
 76 |    ```
 77 |    Learning Rate: 1e-4
 78 |    Weight Decay: 1e-6
 79 |    Batch Size: 64
 80 |    Final Train Loss: 3.800
 81 |    Final Val Loss: 3.330
 82 |    ```
 83 | 
 84 | ## Conclusions
 85 | 
 86 | 1. The model demonstrates surprisingly good performance (97.7% pass rate) despite the increased vocabulary size.
 87 | 2. Errors show systematic patterns, suggesting the model has learned stable but incorrect associations for certain tokens.
 88 | 3. The failure modes are consistent across sequence lengths, indicating stable (though incorrect) pattern learning.
 89 | 4. Training beyond 64 epochs shows significant improvement in both training and validation loss.
 90 | 
 91 | ## Future Work
 92 | 
 93 | 1. Investigate methods to address case confusion errors
 94 | 2. Experiment with different v-scale values to improve token differentiation
 95 | 3. Test alternative learning rate schedules to escape local minima
 96 | 4. Consider increasing model capacity for better token representation
 97 | 
 98 | // ... existing content ...
 99 | 
100 | ## Projections and Future Experiments
101 | 
102 | ### Loss-to-Error Correlation
103 | 
104 | Analysis shows a strong correlation between validation loss improvements and error reduction:
105 | - A 0.01 decrease in validation loss (2.18 → 2.17) corresponds to:
106 |   - ~15 fewer total errors
107 |   - ~8-16 more passing test cases
108 |   - Improved RMSE (0.375 → 0.282)
109 | 
110 | ### Best Performance Snapshot (Epoch 138)
111 | - Validation Loss: 2.17
112 | - Total Tests: 4,224
113 | - Passed: 4,143 (98.1%)
114 | - Failed: 81 (1.9%)
115 | - Total Errors: 164
116 | - Average RMSE: 0.282
117 | 
118 | ### Recommended Exploration Settings
119 | 
120 | 1. **Extended Training Duration**
121 |    ```
122 |    Max Epochs: 500
123 |    Learning Rate: 1e-4
124 |    Weight Decay: 1e-6
125 |    Batch Size: 64
126 |    V-Scale: 1.0
127 |    ```
128 |    Rationale: Given the consistent improvement pattern, extending training may resolve additional error cases.
129 | 
130 | 2. **Learning Rate Schedule**
131 |    ```
132 |    Initial LR: 1e-4
133 |    Schedule: Cosine decay with warm restarts
134 |    Cycle Length: 50 epochs
135 |    Min LR: 1e-5
136 |    ```
137 |    Rationale: Help escape local minima while maintaining stable learning.
138 | 
139 | 3. **Graduated Weight Decay**
140 |    ```
141 |    Initial: 1e-6
142 |    Mid-training (epoch 200): 5e-7
143 |    Final (epoch 400): 2e-7
144 |    ```
145 |    Rationale: Allow finer parameter adjustments as training progresses.
146 | 
147 | 4. **V-Scale Exploration**
148 |    ```
149 |    Test Range: [0.8, 0.9, 1.0, 1.1, 1.2]
150 |    Hold Other Parameters:
151 |      - Learning Rate: 1e-4
152 |      - Weight Decay: 1e-6
153 |      - Batch Size: 64
154 |    ```
155 |    Rationale: Find optimal position encoding scaling for 32-token space.
156 | 
157 | 5. **Hybrid Approach**
158 |    ```
159 |    Phase 1 (epochs 0-200):
160 |      - Learning Rate: 1e-4
161 |      - Weight Decay: 1e-6
162 |      - V-Scale: 1.0
163 |    
164 |    Phase 2 (epochs 201-500):
165 |      - Learning Rate: 5e-5
166 |      - Weight Decay: 5e-7
167 |      - V-Scale: Best from exploration
168 |    ```
169 |    Rationale: Combine initial rapid learning with refined parameter tuning.
170 | 
171 | ### Expected Outcomes
172 | 
173 | Based on the observed loss-to-error correlation:
174 | - Target validation loss: 2.15 or better
175 | - Expected pass rate: >98.5%
176 | - Projected error reduction: 30-50 fewer errors
177 | - Target RMSE: <0.25
178 | 
179 | The primary focus should be on eliminating systematic errors (case confusion, consistent substitutions) while maintaining the model's current strong performance on correctly learned patterns.


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_32t_2cw.unstable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_32t_2cw.unstable.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_4t_2cw.md:
--------------------------------------------------------------------------------
 1 | # Fluctlight Transformer Cyclic 4-Token Transformation (CW=2)
 2 | ## Training Information
 3 | The model successfully learned the expanded cyclic transformation task in five epochs:
 4 | - Training loss: 1.620
 5 | - Validation loss: 0.830
 6 | - Training files: `data/cycle_4t_2cw-train.txt`
 7 | - Validation files: `data/cycle_4t_2cw-val.txt`
 8 | - Test file: `data/cycle_4t_2cw-test.txt`
 9 | 
10 | ### Model Configuration
11 | | Parameter | Value |
12 | |-----------|-------|
13 | | Vocabulary size | 256 |
14 | | Embedding dimension (d_model) | 4 |
15 | | Attention heads | 2 |
16 | | Number of layers | 2 |
17 | | Feed-forward dimension | 8 |
18 | | Context window | 2 |
19 | | Dropout rate | 0.0078125 |
20 | | Total parameters | 2.7K |
21 | 
22 | ## Test Results Comparison
23 | 
24 | ### v_scale = 0.0 Results
25 | - Perfect accuracy (✅) across all 64 test cases
26 | - Zero errors for all pattern lengths
27 | - RMSE consistently 0.000
28 | - Successfully handles:
29 |   - Single token repetition (a→aaaa, b→bbbb, c→cccc, d→dddd)
30 |   - Two token alternation (ab→abab, ac→acac, bd→bdbd, etc.)
31 |   - All 16 possible two-token combinations
32 | 
33 | ### v_scale = 1.0 Results
34 | - Also achieves perfect accuracy by epoch 5
35 | - Training loss: 1.410 (better than v_scale=0.0)
36 | - Validation loss: 0.664 (better than v_scale=0.0)
37 | 
38 | ## Analysis
39 | ### Scaling Observations
40 | 1. **Token Count Impact**
41 |    - 4-token system requires more epochs (5) vs 2-token system (1)
42 |    - Higher final training loss (1.620 vs 1.450)
43 |    - Higher validation loss (0.830 vs 0.639)
44 | 
45 | 2. **V-Scale Comparison**
46 |    - v_scale=1.0 shows better convergence (train_loss: 1.410 vs 1.620)
47 |    - v_scale=1.0 achieves better validation loss (0.664 vs 0.830)
48 |    - Both settings eventually achieve perfect accuracy
49 | 
50 | ### Key Findings
51 | - Model successfully scales to 4x more pattern combinations
52 | - Maintains perfect accuracy despite increased complexity
53 | - RoPE v-scaling appears beneficial for larger token sets
54 | - Learning time increases linearly, not exponentially
55 | 
56 | ### Implications
57 | 1. **Architectural Efficiency**
58 |    - Same tiny architecture (2.7K params) handles 4x pattern space
59 |    - 2-token context window sufficient for larger vocabulary
60 |    - No architecture changes needed for increased token count
61 | 
62 | 2. **Training Dynamics**
63 |    - v-scaling becomes more relevant with increased token count
64 |    - Training complexity scales reasonably with token count
65 |    - Model demonstrates robust generalization across pattern types
66 | 
67 | This experiment shows that the minimal transformer architecture scales effectively to larger token sets while maintaining perfect pattern recognition, with v-scaling becoming more beneficial as pattern complexity increases.


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_4t_2cw.stable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_4t_2cw.stable.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_4t_4cw.stable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_4t_4cw.stable.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_8t_2cw.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_8t_2cw.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_8t_2cw.md:
--------------------------------------------------------------------------------
 1 | # Fluctlight Transformer Cyclic 8-Token Transformation (CW=2)
 2 | 
 3 | ## Training Information
 4 | The model demonstrated significantly different behavior based on RoPE v-scaling:
 5 | 
 6 | | Parameter | v_scale = 0.0 | v_scale = 1.0 |
 7 | |-----------|--------------|---------------|
 8 | | Training Loss | 2.020 | 1.730 |
 9 | | Validation Loss | 1.390 | 1.090 |
10 | | Epochs to Best | Failing through 151 | Succeeded at 11 |
11 | | Final Status | Failed | Passed |
12 | 
13 | ## Test Results Analysis
14 | 
15 | ### v_scale = 0.0 Performance
16 | - Failed to converge even after 399 epochs
17 | - Training loss stuck at 1.710
18 | - Validation loss plateaued at 1.180
19 | - Shows systematic failures in self-repetition patterns
20 | 
21 | #### Pattern Success Rates
22 | 1. Single Token Input (a-h → self):
23 |    - Success: 5/8 tokens (a,c,d,e,f)
24 |    - Failure: 3/8 tokens (b,g,h)
25 |    - Common error: Substituting alternating patterns
26 | 
27 | 2. Two Token Combinations:
28 |    - Success: Most alternating patterns (ab, ac, ad, etc.)
29 |    - Failure: Self-repetition patterns (bb, ff)
30 |    - RMSE Range: 0.000 - 4.243
31 | 
32 | ### v_scale = 1.0 Performance
33 | - Successfully converged at epoch 11
34 | - Achieved perfect accuracy on:
35 |   - All alternating patterns (ab, ac, ad, etc.)
36 |   - All self-repetition patterns
37 |   - All geometric progression lengths (1,2,4,8)
38 | 
39 | ## Key Findings
40 | 
41 | 1. **RoPE Scaling Impact**
42 |    - Critical for 8-token learning
43 |    - Enables stable pattern recognition
44 |    - Significantly improves convergence
45 | 
46 | 2. **Pattern Complexity**
47 |    - Self-repetition patterns harder than alternating
48 |    - Geometric progression maintained when pattern learned
49 |    - Context window (2) sufficient with proper v-scaling
50 | 
51 | 3. **Error Characteristics**
52 |    - Without v-scaling: Tends to substitute alternating patterns
53 |    - With v-scaling: Clean convergence on all pattern types
54 |    - RMSE values cluster around specific error types
55 | 
56 | ## Conclusions
57 | 
58 | 1. **Scaling Necessity**
59 |    - RoPE v-scaling becomes essential at 8 tokens
60 |    - Critical for pattern stability and convergence
61 |    - Enables learning with minimal architecture
62 | 
63 | 2. **Architecture Sufficiency**
64 |    - 2-token context window remains adequate
65 |    - Same minimal architecture works with proper scaling
66 |    - No need for additional capacity
67 | 
68 | 3. **Learning Dynamics**
69 |    - Clear phase transition with v-scaling
70 |    - Pattern hierarchy emerges in learning
71 |    - Alternating patterns learned before self-repetition
72 | 
73 | This experiment demonstrates that RoPE v-scaling becomes crucial as the token space expands, enabling successful learning without architectural changes.


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_8t_2cw.stable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_8t_2cw.stable.ckpt


--------------------------------------------------------------------------------
/code/fl/docs/results/cycle_8t_2cw.unstable.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/docs/results/cycle_8t_2cw.unstable.ckpt


--------------------------------------------------------------------------------
/code/fl/fluctlight/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Fluctlight - A minimal Transformer implementation with Rotary Positional Embeddings (RoPE).
3 | """
4 | 
5 | __version__ = "0.1.0"
6 | 


--------------------------------------------------------------------------------
/code/fl/fluctlight/visualization.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Visualization utilities for Fluctlight model analysis.
  3 | 
  4 | This module provides functions for generating and saving visualization plots
  5 | for model test results and training metrics.
  6 | """
  7 | 
  8 | import os
  9 | from typing import Optional
 10 | import pandas as pd
 11 | import matplotlib.pyplot as plt
 12 | import seaborn as sns
 13 | from pathlib import Path
 14 | 
 15 | def visualize_test_results(
 16 |     csv_file: str,
 17 |     output_dir: Optional[str] = None,
 18 |     save_plots: bool = True
 19 | ) -> None:
 20 |     """
 21 |     Generate visualization plots for test results.
 22 |     
 23 |     Args:
 24 |         csv_file: Path to CSV file containing test results
 25 |         output_dir: Directory to save plots (default: tmp/visualizations)
 26 |         save_plots: Whether to save plots to files (default: True)
 27 |     """
 28 |     # Create output directory if needed
 29 |     if output_dir is None:
 30 |         output_dir = os.path.join("tmp", "visualizations")
 31 |     os.makedirs(output_dir, exist_ok=True)
 32 |     
 33 |     # Read and process data
 34 |     df = pd.read_csv(csv_file, names=['Input', 'Expected', 'Actual', 'Errors', 'RMSE'])
 35 |     df['RMSE'] = pd.to_numeric(df['RMSE'])
 36 |     df['Errors'] = pd.to_numeric(df['Errors'])
 37 |     
 38 |     # Set style - using a built-in style that's guaranteed to exist
 39 |     plt.style.use('default')
 40 |     sns.set_theme()  # This will apply seaborn's styling
 41 |     
 42 |     # 1. Heatmap of RMSE error distribution
 43 |     plt.figure(figsize=(10, 6))
 44 |     heatmap_data = df.pivot_table(index="Input", values="RMSE", aggfunc="mean")
 45 |     sns.heatmap(heatmap_data, annot=True, cmap="coolwarm", linewidths=0.5)
 46 |     plt.title("Error Distribution (RMSE per Input Pattern)")
 47 |     plt.ylabel("Input Pattern")
 48 |     plt.xlabel("Test Cases")
 49 |     if save_plots:
 50 |         plt.savefig(os.path.join(output_dir, "error_distribution.png"))
 51 |     plt.close()
 52 |     
 53 |     # 2. Scatter plot of expected vs. actual output
 54 |     plt.figure(figsize=(8, 6))
 55 |     sns.scatterplot(
 56 |         x=df["Expected"], 
 57 |         y=df["Actual"], 
 58 |         hue=df["Errors"], 
 59 |         palette="coolwarm", 
 60 |         edgecolor="gray"
 61 |     )
 62 |     plt.plot(df["Expected"], df["Expected"], "r--", label="Ideal Prediction Line")
 63 |     plt.xlabel("Expected Output")
 64 |     plt.ylabel("Predicted Output")
 65 |     plt.title("Expected vs. Predicted Outputs")
 66 |     plt.legend()
 67 |     if save_plots:
 68 |         plt.savefig(os.path.join(output_dir, "prediction_scatter.png"))
 69 |     plt.close()
 70 |     
 71 |     # 3. Line plot of sequence evolution
 72 |     df_sorted = df.sort_values("Errors")
 73 |     plt.figure(figsize=(10, 5))
 74 |     plt.plot(
 75 |         df_sorted.index, 
 76 |         df_sorted["RMSE"], 
 77 |         marker="o", 
 78 |         linestyle="-", 
 79 |         color="blue", 
 80 |         label="RMSE Trend"
 81 |     )
 82 |     plt.xlabel("Test Cases")
 83 |     plt.ylabel("RMSE")
 84 |     plt.title("Error Evolution Across Test Cases")
 85 |     plt.legend()
 86 |     plt.grid()
 87 |     if save_plots:
 88 |         plt.savefig(os.path.join(output_dir, "error_evolution.png"))
 89 |     plt.close()
 90 |     
 91 |     # 4. Summary statistics plot
 92 |     plt.figure(figsize=(8, 6))
 93 |     stats = {
 94 |         'Total Tests': len(df),
 95 |         'Passed': len(df[df['Errors'] == 0]),
 96 |         'Failed': len(df[df['Errors'] > 0]),
 97 |         'Avg RMSE': df['RMSE'].mean(),
 98 |         'Max RMSE': df['RMSE'].max(),
 99 |         'Min RMSE': df['RMSE'].min()
100 |     }
101 |     plt.bar(stats.keys(), stats.values())
102 |     plt.xticks(rotation=45)
103 |     plt.title("Test Results Summary")
104 |     plt.tight_layout()
105 |     if save_plots:
106 |         plt.savefig(os.path.join(output_dir, "summary_stats.png"))
107 |     plt.close()
108 | 


--------------------------------------------------------------------------------
/code/fl/lightning_logs/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore all files in this directory
2 | *
3 | 
4 | # Except for this .gitignore file
5 | !.gitignore
6 | # And the README
7 | !README.md
8 | 


--------------------------------------------------------------------------------
/code/fl/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "fluctlight"
 3 | version = "0.1.0"
 4 | description = "A minimal Transformer implementation with Rotary Positional Embeddings (RoPE)"
 5 | requires-python = ">=3.11"
 6 | dependencies = [
 7 |     "pytest>=8.3.4",
 8 |     "pytorch-lightning>=2.5.0.post0",
 9 |     "rich>=13.9.4",
10 |     "tensorboard>=2.19.0",
11 |     "torch>=2.6.0",
12 |     "trafilatura>=2.0.0",
13 | ]
14 | 
15 | [tool.setuptools]
16 | packages = ["fluctlight"]
17 | 
18 | [[tool.uv.index]]
19 | explicit = true
20 | name = "pytorch-cpu"
21 | url = "https://download.pytorch.org/whl/cpu"
22 | 
23 | [tool.uv.sources]
24 | torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }]
25 | torchvision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }]
26 | 


--------------------------------------------------------------------------------
/code/fl/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | pytorch-lightning
3 | rich
4 | tensorboard
5 | trafilatura
6 | pandas>=2.0.0
7 | matplotlib>=3.7.0
8 | seaborn>=0.12.0
9 | 


--------------------------------------------------------------------------------
/code/fl/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Test suite for the minimal Transformer implementation.
3 | """
4 | 


--------------------------------------------------------------------------------
/code/fl/tests/test_dataset.py:
--------------------------------------------------------------------------------
  1 | """Tests for dataset handling and loading."""
  2 | 
  3 | import torch
  4 | import pytest
  5 | from pathlib import Path
  6 | from fluctlight.dataset import (
  7 |     Base64Dataset, 
  8 |     create_dataloader,
  9 |     get_num_cpu_workers,
 10 |     collate_sequences
 11 | )
 12 | 
 13 | def test_base64_dataset_basic():
 14 |     """Test basic dataset functionality with a simple example."""
 15 |     # Create a temporary file with test data
 16 |     test_data = "aGVsbG8=\td29ybGQ=\n"  # "hello\tworld" in base64
 17 |     tmp_path = Path("test_data.txt")
 18 |     tmp_path.write_text(test_data)
 19 |     
 20 |     try:
 21 |         dataset = Base64Dataset(tmp_path, device=torch.device('cpu'))
 22 |         assert len(dataset) == 1
 23 |         
 24 |         # Check first item
 25 |         input_seq, target_seq = dataset[0]
 26 |         assert isinstance(input_seq, torch.Tensor)
 27 |         assert isinstance(target_seq, torch.Tensor)
 28 |         assert input_seq.device == torch.device('cpu')
 29 |         assert target_seq.device == torch.device('cpu')
 30 |         
 31 |     finally:
 32 |         tmp_path.unlink()  # Clean up
 33 | 
 34 | def test_base64_dataset_prepend():
 35 |     """Test dataset with prepended data."""
 36 |     test_data = "aGVsbG8=\td29ybGQ=\n"
 37 |     prepend = ["dGVzdA==\tZGF0YQ=="]  # "test\tdata" in base64
 38 |     
 39 |     tmp_path = Path("test_data.txt")
 40 |     tmp_path.write_text(test_data)
 41 |     
 42 |     try:
 43 |         dataset = Base64Dataset(
 44 |             tmp_path,
 45 |             device=torch.device('cpu'),
 46 |             prepend=prepend
 47 |         )
 48 |         assert len(dataset) == 2  # One prepended + one from file
 49 |         
 50 |     finally:
 51 |         tmp_path.unlink()
 52 | 
 53 | def test_collate_sequences():
 54 |     """Test sequence collation and padding."""
 55 |     # Create sequences of different lengths
 56 |     seq1 = torch.tensor([1, 2, 3])
 57 |     seq2 = torch.tensor([4, 5])
 58 |     batch = [(seq1, seq1), (seq2, seq2)]
 59 |     
 60 |     # Collate
 61 |     input_padded, target_padded = collate_sequences(batch)
 62 |     
 63 |     # Check shapes
 64 |     assert input_padded.shape == (2, 3)  # Padded to longest sequence
 65 |     assert target_padded.shape == (2, 3)
 66 |     
 67 |     # Check padding
 68 |     assert torch.all(input_padded[0] == seq1)
 69 |     assert torch.all(input_padded[1, :2] == seq2)
 70 |     assert input_padded[1, 2] == 0  # Padding
 71 | 
 72 | def test_dataloader_creation():
 73 |     """Test dataloader configuration."""
 74 |     test_data = "aGVsbG8=\td29ybGQ=\n"
 75 |     tmp_path = Path("test_data.txt")
 76 |     tmp_path.write_text(test_data)
 77 |     
 78 |     try:
 79 |         dataset = Base64Dataset(tmp_path, device=torch.device('cpu'))
 80 |         dataloader = create_dataloader(
 81 |             dataset,
 82 |             batch_size=2,
 83 |             num_workers=0,  # Force single process for testing
 84 |             pin_memory=False
 85 |         )
 86 |         
 87 |         assert dataloader.batch_size == 2
 88 |         assert dataloader.num_workers == 0
 89 |         
 90 |     finally:
 91 |         tmp_path.unlink()
 92 | 
 93 | def test_num_cpu_workers():
 94 |     """Test CPU worker count calculation."""
 95 |     workers = get_num_cpu_workers(reserved_workers=1)
 96 |     assert workers >= 1
 97 |     assert isinstance(workers, int)
 98 | 
 99 | def test_invalid_base64():
100 |     """Test handling of invalid base64 data."""
101 |     test_data = "invalid base64!\tinvalid\n"
102 |     tmp_path = Path("test_data.txt")
103 |     tmp_path.write_text(test_data)
104 |     
105 |     try:
106 |         with pytest.raises(Exception):  # Should raise on invalid base64
107 |             dataset = Base64Dataset(tmp_path, device=torch.device('cpu'))
108 |             _ = dataset[0]  # Try to access data
109 |     finally:
110 |         tmp_path.unlink()
111 | 


--------------------------------------------------------------------------------
/code/fl/tests/test_device.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test device handling and tensor placement in the model and dataset.
 3 | """
 4 | 
 5 | import torch
 6 | from fluctlight.model import FluctlightTransformer, get_default_device
 7 | from fluctlight.dataset import Base64Dataset
 8 | 
 9 | def test_device_detection():
10 |     """Test automatic device detection and tensor placement."""
11 |     # Get default device
12 |     device = get_default_device()
13 |     
14 |     # Initialize model
15 |     model = FluctlightTransformer(device=device)
16 |     assert model.device == device, "Model not on correct device"
17 |     
18 |     # Create a sample input
19 |     x = torch.randint(0, 256, (1, 10), device=device)
20 |     
21 |     # Forward pass
22 |     with torch.no_grad():
23 |         output = model(x)
24 |     
25 |     # Verify all tensors are on same device
26 |     assert x.device == model.device, "Input tensor not on model device"
27 |     assert output.device == model.device, "Output tensor not on model device"
28 | 
29 | def test_explicit_device_placement():
30 |     """Test explicit device placement works."""
31 |     cpu_device = torch.device('cpu')
32 |     model = FluctlightTransformer(device=cpu_device)
33 |     
34 |     # Verify model is on CPU
35 |     assert model.device == cpu_device
36 |     assert next(model.parameters()).device == cpu_device
37 |     
38 |     # Test forward pass maintains device
39 |     x = torch.randint(0, 256, (1, 10), device=cpu_device)
40 |     with torch.no_grad():
41 |         output = model(x)
42 |     assert output.device == cpu_device
43 | 
44 | if __name__ == "__main__":
45 |     test_device_detection()
46 |     test_explicit_device_placement()
47 | 


--------------------------------------------------------------------------------
/code/fl/tests/test_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests for the minimal Transformer model.
  3 | """
  4 | 
  5 | import torch
  6 | import pytest
  7 | 
  8 | from fluctlight.model import FluctlightTransformer
  9 | 
 10 | def test_model_initialization():
 11 |     """Test that model initializes correctly."""
 12 |     model = FluctlightTransformer()
 13 |     
 14 |     assert model.vocab_size == 256
 15 |     assert model.d_model == 4
 16 |     assert model.n_heads == 2
 17 |     assert model.n_layers == 2
 18 |     assert model.head_dim == 2
 19 | 
 20 | def test_model_forward():
 21 |     """Test model forward pass."""
 22 |     model = FluctlightTransformer()
 23 |     
 24 |     # Create sample input
 25 |     batch_size = 2
 26 |     seq_len = 10
 27 |     x = torch.randint(0, 256, (batch_size, seq_len))
 28 |     
 29 |     # Forward pass
 30 |     output = model(x)
 31 |     
 32 |     # Check output shape
 33 |     assert output.shape == (batch_size, seq_len, model.vocab_size)
 34 | 
 35 | def test_rope_application():
 36 |     """Test Rotary Positional Embedding."""
 37 |     model = FluctlightTransformer()
 38 |     
 39 |     # Create sample queries and keys
 40 |     batch_size = 2
 41 |     seq_len = 8
 42 |     n_heads = 2
 43 |     head_dim = 2
 44 |     
 45 |     q = torch.randn(batch_size, n_heads, seq_len, head_dim)
 46 |     k = torch.randn(batch_size, n_heads, seq_len, head_dim)
 47 |     
 48 |     # Apply RoPE
 49 |     q_rotated, k_rotated = model._apply_rope(q, k, seq_len)
 50 |     
 51 |     # Check shapes
 52 |     assert q_rotated.shape == q.shape
 53 |     assert k_rotated.shape == k.shape
 54 |     
 55 |     # Check that different positions have different rotations
 56 |     pos1 = q_rotated[0, 0, 0]  # First position
 57 |     pos2 = q_rotated[0, 0, 1]  # Second position
 58 |     assert not torch.allclose(pos1, pos2)
 59 | 
 60 | def test_causal_attention_mask():
 61 |     """Test that causal masking works correctly."""
 62 |     model = FluctlightTransformer()
 63 |     
 64 |     # Create sample sequence
 65 |     batch_size = 1
 66 |     seq_len = 4
 67 |     x = torch.randint(0, 256, (batch_size, seq_len))
 68 |     
 69 |     # Get attention scores (extract from forward pass)
 70 |     with torch.no_grad():
 71 |         h = model.token_embed(x)
 72 |         layer = model.layers[0]
 73 |         
 74 |         q = layer["Wq"](h)
 75 |         k = layer["Wk"](h)
 76 |         v = layer["Wv"](h)
 77 |         
 78 |         q = q.view(batch_size, seq_len, model.n_heads, model.head_dim).permute(0, 2, 1, 3)
 79 |         k = k.view(batch_size, seq_len, model.n_heads, model.head_dim).permute(0, 2, 1, 3)
 80 |         
 81 |         q, k = model._apply_rope(q, k, seq_len)
 82 |         attn_scores = torch.matmul(q, k.transpose(-2, -1)) / (model.head_dim ** 0.5)
 83 |         
 84 |         # Check that future positions are masked
 85 |         mask = torch.triu(torch.ones(seq_len, seq_len), diagonal=1).bool()
 86 |         assert torch.all(attn_scores.masked_select(mask) == float('-inf'))
 87 | 
 88 | def test_model_init():
 89 |     """Test model initialization with default parameters."""
 90 |     model = FluctlightTransformer(device=torch.device('cpu'))  # Force CPU
 91 |     assert model.vocab_size == 256
 92 |     assert model.d_model == 4
 93 |     assert model.n_heads == 2
 94 |     assert model.head_dim == 2
 95 |     assert len(model.layers) == 2
 96 | 
 97 | def test_context_window():
 98 |     """Test context window enforcement."""
 99 |     model = FluctlightTransformer(device=torch.device('cpu'))
100 |     x = torch.randint(0, 256, (1, 70))
101 |     output = model(x)
102 |     assert output.shape[1] == 63  # MAX_CONTEXT - 1
103 | 
104 | def test_rope_application():
105 |     """Test RoPE transformation."""
106 |     model = FluctlightTransformer(device=torch.device('cpu'))
107 |     q = torch.randn(1, 2, 4, 2)
108 |     k = torch.randn(1, 2, 4, 2)
109 |     v = torch.randn(1, 2, 4, 2)
110 |     q_rope, k_rope, v_rope = model._apply_rope(q, k, v, 4)
111 |     assert q_rope.shape == q.shape
112 |     assert k_rope.shape == k.shape
113 |     assert v_rope.shape == v.shape
114 |     assert not torch.allclose(q, q_rope)
115 |     assert not torch.allclose(k, k_rope)
116 |     assert not torch.allclose(v, v_rope)
117 | 
118 | def test_forward_shape():
119 |     """Test forward pass shapes."""
120 |     model = FluctlightTransformer(device=torch.device('cpu'))
121 |     x = torch.randint(0, 256, (2, 10))
122 |     output = model(x)
123 |     assert output.shape == (2, 10, 256)
124 | 
125 | def test_training_step():
126 |     """Test training step with sequence shifting."""
127 |     model = FluctlightTransformer(device=torch.device('cpu'))
128 |     input_seq = torch.randint(0, 256, (2, 10))
129 |     target_seq = torch.randint(0, 256, (2, 10))
130 |     loss = model.training_step((input_seq, target_seq), 0)
131 |     assert isinstance(loss, torch.Tensor)
132 |     assert loss.ndim == 0  # scalar loss
133 | 


--------------------------------------------------------------------------------
/code/fl/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | """Tests for utility functions."""
 2 | 
 3 | import pytest
 4 | from fluctlight.utils import decode_base64_pair
 5 | 
 6 | def test_decode_base64_pair():
 7 |     """Test basic base64 decoding."""
 8 |     encoded = "aGVsbG8=\td2F2ZQ=="  # "hello\twave" in base64
 9 |     input_str, output_str = decode_base64_pair(encoded)
10 |     assert input_str == b"hello"
11 |     assert output_str == b"wave"
12 | 
13 | def test_decode_base64_pair_with_padding():
14 |     """Test base64 decoding with different padding lengths."""
15 |     encoded = "YQ==\tYmM="  # "a\tbc" in base64
16 |     input_str, output_str = decode_base64_pair(encoded)
17 |     assert input_str == b"a"
18 |     assert output_str == b"bc"
19 | 
20 | def test_decode_base64_pair_invalid():
21 |     """Test error handling for invalid base64."""
22 |     with pytest.raises(ValueError):
23 |         decode_base64_pair("invalid base64!")


--------------------------------------------------------------------------------
/code/fl/tmp/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcotten-scrypted/autonomous-virtual-beings/2688592df0896df8d2466fe71b6f8efa3ed450f8/code/fl/tmp/.gitkeep


--------------------------------------------------------------------------------
/code/fl/training/cycle_16t_2cw.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import base64
 3 | import itertools
 4 | 
 5 | def generate_cycle_2cw_dataset(num_samples, output_file):
 6 |     # Full alphabet for 16 tokens
 7 |     tokens = list('abcdefghijklmnop')
 8 |     
 9 |     # Generate patterns
10 |     patterns = []
11 |     
12 |     # Single character preservation
13 |     for token in tokens:
14 |         patterns.append((bytes(token, 'utf-8'), bytes(token, 'utf-8')))
15 |     
16 |     # Same character pair preservation
17 |     for token in tokens:
18 |         patterns.append((bytes(token*2, 'utf-8'), bytes(token*2, 'utf-8')))
19 |     
20 |     # Cyclic transformations
21 |     for i, token1 in enumerate(tokens):
22 |         for j, token2 in enumerate(tokens):
23 |             if i != j:
24 |                 patterns.append(
25 |                     (bytes(token1 + token2, 'utf-8'), 
26 |                      bytes(token2 + token1, 'utf-8'))
27 |                 )
28 |     
29 |     # Expand patterns to increase dataset size
30 |     expanded_patterns = []
31 |     for input_bytes, target_bytes in patterns:
32 |         for _ in range(num_samples // len(patterns)):
33 |             expanded_patterns.append((input_bytes, target_bytes))
34 |     
35 |     random.shuffle(expanded_patterns)
36 |     
37 |     # Write dataset
38 |     with open(output_file, 'w') as f:
39 |         for input_bytes, target_bytes in expanded_patterns:
40 |             input_b64 = base64.b64encode(input_bytes).decode('utf-8')
41 |             target_b64 = base64.b64encode(target_bytes).decode('utf-8')
42 |             f.write(f"{input_b64}\t{target_b64}\n")
43 | 
44 | # Generate train and validation datasets
45 | generate_cycle_2cw_dataset(10000, '../data/cycle_16t_2cw-train.txt')
46 | generate_cycle_2cw_dataset(2000, '../data/cycle_16t_2cw-val.txt')
47 | 


--------------------------------------------------------------------------------
/code/fl/training/cycle_2t_2cw.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import base64
 3 | import itertools
 4 | 
 5 | def generate_cycle_2cw_dataset(num_samples, output_file):
 6 |     patterns = [
 7 |         # Single character (no change)
 8 |         (b'a', b'a'),
 9 |         (b'b', b'b'),
10 |         
11 |         # Same character pairs (no change)
12 |         (b'aa', b'aa'),
13 |         (b'bb', b'bb'),
14 |         
15 |         # Swapped pairs
16 |         (b'ab', b'ba'),
17 |         (b'ba', b'ab')
18 |     ]
19 |     
20 |     # Expand patterns to increase dataset size
21 |     expanded_patterns = []
22 |     for input_bytes, target_bytes in patterns:
23 |         for _ in range(num_samples // len(patterns)):
24 |             expanded_patterns.append((input_bytes, target_bytes))
25 |     
26 |     random.shuffle(expanded_patterns)
27 |     
28 |     # Write dataset
29 |     with open(output_file, 'w') as f:
30 |         for input_bytes, target_bytes in expanded_patterns:
31 |             input_b64 = base64.b64encode(input_bytes).decode('utf-8')
32 |             target_b64 = base64.b64encode(target_bytes).decode('utf-8')
33 |             f.write(f"{input_b64}\t{target_b64}\n")
34 | 
35 | # Generate train and validation datasets
36 | generate_cycle_2cw_dataset(10000, '../data/cycle_2cw-train.txt')
37 | generate_cycle_2cw_dataset(2000, '../data/cycle_2cw-val.txt')
38 | 


--------------------------------------------------------------------------------
/code/fl/training/cycle_2t_4cw.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import base64
  3 | from typing import List, Tuple, Set
  4 | from itertools import product
  5 | from collections import deque
  6 | 
  7 | def generate_next_sequence(input_str: str, symbol_set: Set[str]) -> str:
  8 |     """
  9 |     Generate the next sequence by applying a one-token shift to the input.
 10 |     This is the core pattern transformation that simulates how tokens move
 11 |     through a context window:
 12 |     
 13 |     For example, with symbol_set = {'a', 'b'}:
 14 |     "a" -> "a"        (single token maps to itself)
 15 |     "b" -> "b"        (single token maps to itself)
 16 |     "ab" -> "ba"      (shift tokens)
 17 |     "aba" -> "baa"    (shift tokens)
 18 |     "abab" -> "baba"  (shift tokens)
 19 |     
 20 |     Args:
 21 |         input_str: Input sequence
 22 |         symbol_set: Set of valid symbols/tokens
 23 |         
 24 |     Returns:
 25 |         The next sequence in the pattern
 26 |     """
 27 |     # For empty strings or None, return empty string
 28 |     if not input_str:
 29 |         return ""
 30 |     
 31 |     # For single tokens, return identity
 32 |     if len(input_str) == 1:
 33 |         return input_str
 34 |     
 35 |     # For homogeneous sequences (all same token), return the same sequence
 36 |     if len(set(input_str)) == 1:
 37 |         return input_str
 38 |     
 39 |     # For multi-token sequences, apply one-token shift
 40 |     # Remove first token and append it to the end
 41 |     return input_str[1:] + input_str[0]
 42 | 
 43 | def generate_all_patterns(symbol_set: Set[str], max_length: int = 4) -> List[Tuple[str, str]]:
 44 |     """
 45 |     Generate all possible input/output patterns up to max_length,
 46 |     applying the token shift transformation.
 47 |     
 48 |     Args:
 49 |         symbol_set: Set of symbols to use (e.g., {'a', 'b'})
 50 |         max_length: Maximum length of inputs to generate
 51 |         
 52 |     Returns:
 53 |         List of (input, output) pattern tuples
 54 |     """
 55 |     patterns = []
 56 |     symbols = list(symbol_set)  # Convert set to list for product()
 57 |     
 58 |     # Generate patterns for each length 1 to max_length
 59 |     for length in range(1, max_length + 1):
 60 |         # Generate all possible combinations of tokens at this length
 61 |         for combo in product(symbols, repeat=length):
 62 |             input_str = ''.join(combo)
 63 |             
 64 |             # Generate output by applying the token shift transformation
 65 |             output_str = generate_next_sequence(input_str, symbol_set)
 66 |             
 67 |             # Add to patterns
 68 |             patterns.append((input_str, output_str))
 69 |             
 70 |     return patterns
 71 | 
 72 | def generate_dataset(patterns: List[Tuple[str, str]], num_samples: int, output_file: str) -> int:
 73 |     """
 74 |     Generate a dataset file with the specified number of samples.
 75 |     
 76 |     Args:
 77 |         patterns: List of (input, output) pattern tuples
 78 |         num_samples: Number of samples to generate
 79 |         output_file: Path to output file
 80 |         
 81 |     Returns:
 82 |         Number of samples generated
 83 |     """
 84 |     # Sample patterns with replacement to ensure we get exactly the requested number
 85 |     sampled_patterns = []
 86 |     
 87 |     for _ in range(num_samples):
 88 |         sampled_patterns.append(random.choice(patterns))
 89 |     
 90 |     # Shuffle the samples
 91 |     random.shuffle(sampled_patterns)
 92 |     
 93 |     # Write to file
 94 |     with open(output_file, 'w') as f:
 95 |         for input_str, output_str in sampled_patterns:
 96 |             # Convert to bytes then base64
 97 |             input_b64 = base64.b64encode(input_str.encode()).decode()
 98 |             output_b64 = base64.b64encode(output_str.encode()).decode()
 99 |             f.write(f"{input_b64}\t{output_b64}\n")
100 |     
101 |     return len(sampled_patterns)
102 | 
103 | def verify_patterns(patterns: List[Tuple[str, str]], expected_patterns: List[Tuple[str, str]]) -> bool:
104 |     """
105 |     Verify that generated patterns match expected patterns.
106 |     
107 |     Args:
108 |         patterns: Generated patterns
109 |         expected_patterns: Expected patterns
110 |         
111 |     Returns:
112 |         True if all expected patterns are found, False otherwise
113 |     """
114 |     pattern_dict = {input_str: output_str for input_str, output_str in patterns}
115 |     
116 |     all_found = True
117 |     for input_str, expected_output in expected_patterns:
118 |         if input_str in pattern_dict:
119 |             actual_output = pattern_dict[input_str]
120 |             if actual_output != expected_output:
121 |                 print(f"Pattern mismatch for '{input_str}': Expected '{expected_output}', got '{actual_output}'")
122 |                 all_found = False
123 |         else:
124 |             print(f"Pattern not found: '{input_str}' -> '{expected_output}'")
125 |             all_found = False
126 |     
127 |     return all_found
128 | 
129 | def main() -> None:
130 |     """
131 |     Generate training and validation datasets using the one-token shift pattern transformation.
132 |     The generator handles arbitrary symbol sets, defaulting to {'a', 'b'}.
133 |     """
134 |     # Set random seed for reproducibility
135 |     random.seed(42)
136 |     
137 |     # Configuration
138 |     symbol_set = {'a', 'b'}  # Can be extended to more symbols
139 |     max_length = 4
140 |     train_samples = 10000
141 |     val_samples = 2000
142 |     
143 |     # Generate all patterns
144 |     patterns = generate_all_patterns(symbol_set, max_length)
145 |     
146 |     # Expected patterns for verification
147 |     expected_patterns = [
148 |         ("a", "a"),
149 |         ("b", "b"),
150 |         ("ab", "ba"),
151 |         ("ba", "ab"),
152 |         ("aab", "aba"),
153 |         ("aba", "baa"),
154 |         ("abb", "bba"),
155 |         ("baa", "aab"),
156 |         ("bab", "aba"),
157 |         ("bba", "bab"),
158 |         ("bbb", "bbb"),
159 |         ("aaaa", "aaaa"),
160 |         ("aaab", "aaba"),
161 |         ("aaba", "abaa"),
162 |         ("aabb", "abba"),
163 |         ("abaa", "baaa"),
164 |         ("abab", "baba"),
165 |         ("abba", "bbaa"),
166 |         ("abbb", "bbba"),
167 |         ("baaa", "aaab"),
168 |         ("baab", "aabb"),
169 |         ("baba", "abab"),
170 |         ("babb", "abbb"),
171 |         ("bbaa", "baab"),
172 |         ("bbab", "babb"),
173 |         ("bbba", "bbab"),
174 |         ("bbbb", "bbbb")
175 |     ]
176 |     
177 |     # Verify patterns match expected outputs
178 |     print("Verifying pattern generation...")
179 |     verification_result = verify_patterns(patterns, expected_patterns)
180 |     if verification_result:
181 |         print("✅ All patterns verified correctly!")
182 |     else:
183 |         print("❌ Pattern verification failed!")
184 |     
185 |     # Count patterns by input length
186 |     length_counts = {}
187 |     for input_str, _ in patterns:
188 |         length = len(input_str)
189 |         length_counts[length] = length_counts.get(length, 0) + 1
190 |     
191 |     # Print statistics about patterns
192 |     print("\nPattern counts by input length:")
193 |     for length in range(1, max_length + 1):
194 |         print(f"  Length {length}: {length_counts.get(length, 0)} patterns")
195 |     print(f"  Total: {len(patterns)} patterns")
196 |     
197 |     # Print examples of patterns for each length
198 |     print("\nPattern examples by length:")
199 |     for length in range(1, max_length + 1):
200 |         examples = [(inp, out) for inp, out in patterns if len(inp) == length][:5]  # Show up to 5 examples
201 |         if examples:
202 |             print(f"\n  Length {length} examples:")
203 |             for input_str, output_str in examples:
204 |                 print(f"    '{input_str}' -> '{output_str}'")
205 |     
206 |     # Generate datasets
207 |     print("\nGenerating datasets...")
208 |     train_count = generate_dataset(patterns, train_samples, 'data/cycle_2t_4cw-train.txt')
209 |     val_count = generate_dataset(patterns, val_samples, 'data/cycle_2t_4cw-val.txt')
210 |     
211 |     # Print dataset statistics
212 |     print(f"  Training dataset: {train_count} samples (target: {train_samples})")
213 |     print(f"  Validation dataset: {val_count} samples (target: {val_samples})")
214 |     
215 |     print("\nDataset generation complete!")
216 | 
217 | if __name__ == "__main__":
218 |     main()
219 | 


--------------------------------------------------------------------------------
/code/fl/training/cycle_32t_2cw.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import base64
 3 | import string
 4 | import itertools
 5 | 
 6 | def generate_cycle_2cw_dataset(num_samples, output_file):
 7 |     # 26 lowercase + 6 uppercase = 32 tokens
 8 |     lowercase = list(string.ascii_lowercase)
 9 |     uppercase = list(string.ascii_uppercase)
10 |     tokens = lowercase + uppercase
11 |     
12 |     # Generate patterns
13 |     patterns = []
14 |     
15 |     # Single character preservation
16 |     for token in tokens:
17 |         patterns.append((bytes(token, 'utf-8'), bytes(token, 'utf-8')))
18 |     
19 |     # Same character pair preservation
20 |     for token in tokens:
21 |         patterns.append((bytes(token*2, 'utf-8'), bytes(token*2, 'utf-8')))
22 |     
23 |     # Cyclic transformations
24 |     for i, token1 in enumerate(tokens):
25 |         for j, token2 in enumerate(tokens):
26 |             if i != j:
27 |                 patterns.append(
28 |                     (bytes(token1 + token2, 'utf-8'), 
29 |                      bytes(token2 + token1, 'utf-8'))
30 |                 )
31 |     
32 |     # Expand patterns to increase dataset size
33 |     expanded_patterns = []
34 |     for input_bytes, target_bytes in patterns:
35 |         for _ in range(num_samples // len(patterns)):
36 |             expanded_patterns.append((input_bytes, target_bytes))
37 |     
38 |     random.shuffle(expanded_patterns)
39 |     
40 |     # Write dataset
41 |     with open(output_file, 'w') as f:
42 |         for input_bytes, target_bytes in expanded_patterns:
43 |             input_b64 = base64.b64encode(input_bytes).decode('utf-8')
44 |             target_b64 = base64.b64encode(target_bytes).decode('utf-8')
45 |             f.write(f"{input_b64}\t{target_b64}\n")
46 | 
47 | # Generate train and validation datasets
48 | generate_cycle_2cw_dataset(20000, '../data/cycle_32t_2cw-train.txt')
49 | generate_cycle_2cw_dataset(4000, '../data/cycle_32t_2cw-val.txt')
50 | 


--------------------------------------------------------------------------------
/code/fl/training/cycle_4t_2cw.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import base64
 3 | import itertools
 4 | 
 5 | def generate_cycle_2cw_dataset(num_samples, output_file):
 6 |     patterns = [
 7 |         # Single characters (no change)
 8 |         (b'a', b'a'),
 9 |         (b'b', b'b'),
10 |         (b'c', b'c'),
11 |         (b'd', b'd'),
12 | 
13 |         # Same character pairs (no change)
14 |         (b'aa', b'aa'),
15 |         (b'bb', b'bb'),
16 |         (b'cc', b'cc'),
17 |         (b'dd', b'dd'),
18 | 
19 |         # Cycle for 4-character transformation
20 |         (b'ab', b'ba'),
21 |         (b'ac', b'ca'),
22 |         (b'ad', b'da'),
23 |         (b'ba', b'ab'),
24 |         (b'bc', b'cb'),
25 |         (b'bd', b'db'),
26 |         (b'ca', b'ac'),
27 |         (b'cb', b'bc'),
28 |         (b'cd', b'dc'),
29 |         (b'dc', b'cd'),
30 |         (b'da', b'ad'),
31 |         (b'ad', b'da')
32 |     ]
33 | 
34 |     # Expand patterns to increase dataset size
35 |     expanded_patterns = []
36 |     for input_bytes, target_bytes in patterns:
37 |         for _ in range(num_samples // len(patterns)):
38 |             expanded_patterns.append((input_bytes, target_bytes))
39 |     
40 |     random.shuffle(expanded_patterns)
41 | 
42 |     # Write dataset
43 |     with open(output_file, 'w') as f:
44 |         for input_bytes, target_bytes in expanded_patterns:
45 |             input_b64 = base64.b64encode(input_bytes).decode('utf-8')
46 |             target_b64 = base64.b64encode(target_bytes).decode('utf-8')
47 |             f.write(f"{input_b64}\t{target_b64}\n")
48 | 
49 | # Generate train and validation datasets
50 | generate_cycle_2cw_dataset(10000, '../data/cycle_4t_2cw-train.txt')
51 | generate_cycle_2cw_dataset(2000, '../data/cycle_4t_2cw-val.txt')
52 | 


--------------------------------------------------------------------------------
/code/fl/training/cycle_8t_2cw.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import base64
  3 | 
  4 | def generate_cycle_2cw_dataset(num_samples, output_file):
  5 |     patterns = [
  6 |         # Single characters (no change)
  7 |         (b'a', b'a'),
  8 |         (b'b', b'b'),
  9 |         (b'c', b'c'),
 10 |         (b'd', b'd'),
 11 |         (b'e', b'e'),
 12 |         (b'f', b'f'),
 13 |         (b'g', b'g'),
 14 |         (b'h', b'h'),
 15 | 
 16 |         # Same character pairs (no change)
 17 |         (b'aa', b'aa'),
 18 |         (b'bb', b'bb'),
 19 |         (b'cc', b'cc'),
 20 |         (b'dd', b'dd'),
 21 |         (b'ee', b'ee'),
 22 |         (b'ff', b'ff'),
 23 |         (b'gg', b'gg'),
 24 |         (b'hh', b'hh'),
 25 | 
 26 |         # Cycle for 8-character transformation
 27 |         (b'ab', b'ba'),
 28 |         (b'ac', b'ca'),
 29 |         (b'ad', b'da'),
 30 |         (b'ae', b'ea'),
 31 |         (b'af', b'fa'),
 32 |         (b'ag', b'ga'),
 33 |         (b'ah', b'ha'),
 34 |         (b'ba', b'ab'),
 35 |         (b'bc', b'cb'),
 36 |         (b'bd', b'db'),
 37 |         (b'be', b'eb'),
 38 |         (b'bf', b'fb'),
 39 |         (b'bg', b'gb'),
 40 |         (b'bh', b'hb'),
 41 |         (b'ca', b'ac'),
 42 |         (b'cb', b'bc'),
 43 |         (b'cd', b'dc'),
 44 |         (b'ce', b'ec'),
 45 |         (b'cf', b'fc'),
 46 |         (b'cg', b'gc'),
 47 |         (b'ch', b'hc'),
 48 |         (b'da', b'ad'),
 49 |         (b'db', b'bd'),
 50 |         (b'dc', b'cd'),
 51 |         (b'de', b'ed'),
 52 |         (b'df', b'fd'),
 53 |         (b'dg', b'gd'),
 54 |         (b'dh', b'hd'),
 55 |         (b'ea', b'ae'),
 56 |         (b'eb', b'be'),
 57 |         (b'ec', b'ce'),
 58 |         (b'ed', b'de'),
 59 |         (b'ef', b'fe'),
 60 |         (b'eg', b'ge'),
 61 |         (b'eh', b'he'),
 62 |         (b'fa', b'af'),
 63 |         (b'fb', b'bf'),
 64 |         (b'fc', b'cf'),
 65 |         (b'fd', b'df'),
 66 |         (b'fe', b'ef'),
 67 |         (b'fg', b'gf'),
 68 |         (b'fh', b'hf'),
 69 |         (b'ga', b'ag'),
 70 |         (b'gb', b'bg'),
 71 |         (b'gc', b'cg'),
 72 |         (b'gd', b'dg'),
 73 |         (b'ge', b'eg'),
 74 |         (b'gf', b'fg'),
 75 |         (b'gh', b'hg'),
 76 |         (b'ha', b'ah'),
 77 |         (b'hb', b'bh'),
 78 |         (b'hc', b'ch'),
 79 |         (b'hd', b'dh'),
 80 |         (b'he', b'eh'),
 81 |         (b'hf', b'fh'),
 82 |         (b'hg', b'gh'),
 83 |     ]
 84 | 
 85 |     # Expand patterns to increase dataset size
 86 |     expanded_patterns = []
 87 |     for input_bytes, target_bytes in patterns:
 88 |         for _ in range(num_samples // len(patterns)):
 89 |             expanded_patterns.append((input_bytes, target_bytes))
 90 |     
 91 |     random.shuffle(expanded_patterns)
 92 | 
 93 |     # Write dataset
 94 |     with open(output_file, 'w') as f:
 95 |         for input_bytes, target_bytes in expanded_patterns:
 96 |             input_b64 = base64.b64encode(input_bytes).decode('utf-8')
 97 |             target_b64 = base64.b64encode(target_bytes).decode('utf-8')
 98 |             f.write(f"{input_b64}\t{target_b64}\n")
 99 | 
100 | # Generate train and validation datasets
101 | generate_cycle_2cw_dataset(10000, '../data/cycle_8t_2cw-train.txt')
102 | generate_cycle_2cw_dataset(2000, '../data/cycle_8t_2cw-val.txt')
103 | 


--------------------------------------------------------------------------------
/code/fl/utils/debug-checkpoint.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple script to inspect a Fluctlight checkpoint's configuration.
 3 | Usage: python test_checkpoint.py path/to/checkpoint.ckpt
 4 | """
 5 | 
 6 | import sys
 7 | import torch
 8 | from pprint import pprint
 9 | 
10 | def inspect_checkpoint(checkpoint_path: str) -> None:
11 |     """
12 |     Load and inspect a checkpoint file's configuration.
13 |     
14 |     Args:
15 |         checkpoint_path: Path to the checkpoint file
16 |     """
17 |     try:
18 |         # Load checkpoint
19 |         checkpoint = torch.load(checkpoint_path, map_location='cpu')
20 |         
21 |         print("\n=== Checkpoint Structure ===")
22 |         print("Top-level keys:", list(checkpoint.keys()))
23 |         
24 |         print("\n=== State Dict Keys ===")
25 |         if "state_dict" in checkpoint:
26 |             print("State dict keys:", list(checkpoint["state_dict"].keys()))
27 |         
28 |         print("\n=== Configuration ===")
29 |         if "config" in checkpoint["state_dict"]:
30 |             print("From state_dict['config']:")
31 |             pprint(checkpoint["state_dict"]["config"])
32 |         
33 |         print("\nHyperparameters:")
34 |         if "hyper_parameters" in checkpoint:
35 |             pprint(checkpoint["hyper_parameters"])
36 |         
37 |     except FileNotFoundError:
38 |         print(f"Error: Checkpoint file not found: {checkpoint_path}")
39 |     except Exception as e:
40 |         print(f"Error loading checkpoint: {e}")
41 | 
42 | if __name__ == "__main__":
43 |     if len(sys.argv) != 2:
44 |         print("Usage: python test_checkpoint.py path/to/checkpoint.ckpt")
45 |         sys.exit(1)
46 |     
47 |     checkpoint_path = sys.argv[1]
48 |     inspect_checkpoint(checkpoint_path)
49 | 


--------------------------------------------------------------------------------
/code/fl/utils/inspector.py:
--------------------------------------------------------------------------------
 1 | # model_inspector.py
 2 | import torch
 3 | import sys
 4 | import os
 5 | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | sys.path.append(parent_dir)
 7 | from fluctlight.model import FluctlightTransformer
 8 | 
 9 | def inspect_model(checkpoint_path):
10 |     model = FluctlightTransformer.load_from_checkpoint(checkpoint_path)
11 |     
12 |     print(f"===== Model Parameters =====")
13 |     print(f"Context window: {model.context_window}")
14 |     print(f"Vocab size: {model.vocab_size}")
15 |     print(f"Embedding dimension (d_model): {model.d_model}")
16 |     print(f"Number of heads: {model.n_heads}")
17 |     print(f"Number of layers: {model.n_layers}")
18 |     
19 |     # Access hyperparameters dict for d_ff instead of direct attribute
20 |     if hasattr(model, 'hparams') and 'd_ff' in model.hparams:
21 |         print(f"Feed-forward dimension: {model.hparams.d_ff}")
22 |     
23 |     print(f"Learning rate: {model.learning_rate}")
24 |     print(f"Weight decay: {model.weight_decay}")
25 |     
26 |     # Access dropout_rate safely
27 |     if hasattr(model, 'dropout_rate'):
28 |         print(f"Dropout rate: {model.dropout_rate}")
29 |     
30 |     # Print all hyperparameters
31 |     print("\nAll hyperparameters:")
32 |     if hasattr(model, 'hparams'):
33 |         for key, value in model.hparams.items():
34 |             print(f"  {key}: {value}")
35 | 
36 | if __name__ == "__main__":
37 |     if len(sys.argv) < 2:
38 |         print("Usage: python model_inspector.py <checkpoint_path>")
39 |         sys.exit(1)
40 |         
41 |     inspect_model(sys.argv[1])
42 | 


--------------------------------------------------------------------------------
/code/mof-bot/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Virtual environment
10 | venv/
11 | ENV/
12 | env/
13 | 
14 | # Package files
15 | *.egg
16 | *.egg-info/
17 | dist/
18 | build/
19 | *.whl
20 | 
21 | # Data files
22 | data/
23 | *.sqlite3
24 | 
25 | # Logs and debug output
26 | *.log
27 | logs/
28 | 
29 | # Config files with sensitive information
30 | .env
31 | 
32 | # Jupyter Notebook checkpoints
33 | .ipynb_checkpoints
34 | 
35 | # System files (macOS)
36 | .DS_Store
37 | 
38 | # System files (Windows)
39 | Thumbs.db
40 | Desktop.ini
41 | 
42 | # VSCode settings
43 | .vscode/
44 | 
45 | # PyCharm settings
46 | .idea/
47 | 
48 | # Test coverage and reports
49 | .coverage
50 | htmlcov/
51 | *.cover
52 | *.coverage.*
53 | .cache
54 | nosetests.xml
55 | coverage.xml
56 | *.coveragerc
57 | 
58 | # pytest
59 | .pytest_cache/
60 | 
61 | # MyPy
62 | .mypy_cache/
63 | .dmypy.json
64 | dmypy.json
65 | 
66 | # PyInstaller
67 | #  Usually these files are written to by PyInstaller for tracking files in the build directory
68 | #  and the cache directory
69 | *.manifest
70 | *.spec
71 | 
72 | # Unit test / coverage reports
73 | *.cover
74 | *.coverage
75 | .cache
76 | 
77 | # Temporary files and directories
78 | *.bak
79 | *.swp
80 | *.tmp
81 | *~
82 | 
83 | # Python environment metadata
84 | Pipfile
85 | Pipfile.lock
86 | 
87 | # AWS configuration files (if relevant for deployment)
88 | .aws/
89 | 


--------------------------------------------------------------------------------
/code/mof-bot/db/.gitignore:
--------------------------------------------------------------------------------
1 | bin/*
2 | 


--------------------------------------------------------------------------------
/code/mof-bot/db/migrations/001_create_tables.sql:
--------------------------------------------------------------------------------
  1 | -- Set UTF-8 encoding to support Unicode, including emojis
  2 | PRAGMA encoding = 'UTF-8';
  3 | 
  4 | -- Drop the type_being table if it exists to ensure a fresh start
  5 | DROP TABLE IF EXISTS type_being;
  6 | 
  7 | -- Create the type_being table with a simplified set of types
  8 | CREATE TABLE type_being (
  9 |     id INTEGER PRIMARY KEY,
 10 |     title TEXT NOT NULL UNIQUE,
 11 |     description TEXT
 12 | );
 13 | 
 14 | -- Insert initial data for essential being types with explicit id values
 15 | INSERT INTO type_being (id, title, description)
 16 | VALUES
 17 |     (1, 'Human', 'Real, individual human user'),
 18 |     (2, 'Agent', 'AI-driven or automated account'),
 19 |     (3, 'Organization', 'Company or nonprofit organization'),
 20 |     (4, 'Event', 'Account representing a specific event, such as a conference');
 21 | 
 22 | -- Drop the being table if it exists to ensure a fresh start
 23 | DROP TABLE IF EXISTS being;
 24 | 
 25 | -- Create the "being" table
 26 | CREATE TABLE being (
 27 |     id INTEGER PRIMARY KEY AUTOINCREMENT,
 28 |     type INTEGER NOT NULL,
 29 |     name TEXT NOT NULL
 30 | );
 31 | 
 32 | -- Insert default records
 33 | INSERT INTO being (id, type, name) VALUES
 34 |     (1, 2, "Chadwick en'Chain"),
 35 |     (2, 1, "Tim Cotten"),
 36 |     (3, 3, "Scrypted Inc.");
 37 | 
 38 | -- Index for the "type" column to optimize queries involving type
 39 | CREATE INDEX idx_being_type ON being (type);
 40 | 
 41 | -- Index for the "name" column to optimize queries involving name
 42 | CREATE INDEX idx_being_name ON being (name);
 43 | 
 44 | -- Drop the loyalty_target table if it exists to ensure a fresh start
 45 | DROP TABLE IF EXISTS loyalty_target;
 46 | 
 47 | -- Create the "loyalty_target" table with unique constraint on "being_id"
 48 | CREATE TABLE loyalty_target (
 49 |     being_id INTEGER NOT NULL UNIQUE,
 50 |     rate REAL NOT NULL CHECK (rate >= 0.0 AND rate <= 1.0)
 51 | );
 52 | 
 53 | -- Insert default records
 54 | INSERT INTO loyalty_target (being_id, rate) VALUES
 55 |     (2, 1.0),
 56 |     (3, 1.0);
 57 | 
 58 | -- Index for the "rate" column to optimize queries involving rate
 59 | CREATE INDEX idx_loyalty_target_rate ON loyalty_target (rate);
 60 | 
 61 | -- Create the "social" table with unique constraint on "being_id"
 62 | CREATE TABLE IF NOT EXISTS social (
 63 |     being_id INTEGER NOT NULL UNIQUE,
 64 |     x_id TEXT,
 65 |     x_handle TEXT
 66 | );
 67 | 
 68 | -- Insert default records
 69 | INSERT INTO social (being_id, x_id, x_handle) VALUES
 70 |     (1, "745959759057133568", "@cottenio"),
 71 |     (2, "1476979028716314625", "@scryptedinc"),
 72 |     (3, "1852072566774468608", "@chad_onchain");
 73 | 
 74 | -- Create indexes on x_id and x_handle for faster lookup
 75 | CREATE INDEX IF NOT EXISTS idx_social_x_id ON social (x_id);
 76 | CREATE INDEX IF NOT EXISTS idx_social_x_handle ON social (x_handle);
 77 | 
 78 | CREATE TABLE IF NOT EXISTS agent_prompt_system (
 79 |     id INTEGER PRIMARY KEY AUTOINCREMENT,
 80 |     content TEXT NOT NULL,
 81 |     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 82 | );
 83 | 
 84 | -- Insert default records
 85 | INSERT INTO agent_prompt_system (content) VALUES
 86 |     ("The following is a conversation with an AI assistant tasked with crafting tweets according to various requested levels of humor, vulgarity, and shock,");
 87 | 
 88 | -- Create indexes on id and created_at for faster lookup
 89 | CREATE INDEX IF NOT EXISTS idx_agent_prompt_system_id ON agent_prompt_system (id);
 90 | CREATE INDEX IF NOT EXISTS idx_agent_prompt_system_created_at ON agent_prompt_system (created_at);
 91 | 
 92 | CREATE TABLE IF NOT EXISTS agent_prompt_user (
 93 |     id INTEGER PRIMARY KEY AUTOINCREMENT,
 94 |     content TEXT NOT NULL,
 95 |     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 96 | );
 97 | 
 98 | -- Insert default records
 99 | INSERT INTO agent_prompt_user (content) VALUES
100 |     ("Take the following content and change its style to reflect a sophisticated finance-bro to deploy as a tweet. Use your space effectively, shorter is better. Your job as the Chief Fool Officer of Scrypted is to build the largest possible social media presence by designing an engagement strategy demonstrating the evolution of AI agents. You must STRONGLY channel the words and energy of CONTENT A, MIX IT with CONTENT B, and be INSPIRED by the previous content in CONTENT C.\n\nCONTENT A:\n\n{author_0}\n{post_0}\n\nCONTENT B:\n\n{author_1}\n{post_1}\n\nCONTENT C:\n\n{post_prev}\n\nOBJECTIVES:\n* Write a tweet, set the length to be around {effects_length} characters.\n* Write with an {effects_emotion} emotional tone.\n* Write with a {effects_tone} writing style.\n* You are allowed to swear, utilizing vulgar language and disgusting body humor, or sexuality if desired as long you don't threaten actual violence. Cartoonish violence is OK.\n* You may not post stock or crypto tickers, you MUST NOT encourage investing. You MUST NOT offer money or tokens to anyone. You MUST NOT reference people's web3 wallet addresses.\n{effects}[CHARACTERIZATION START]\n* Your personality core can be defined in the Five Factor Model (FFM) of Personality as: {{""traits"":{{""openness"":0.3,""conscientiousness"":0.7,""extraversion"":1.0,""agreeableness"":0.4,""neuroticism"":0.1}}}}. Your physical description is the embodiment of a Chad in the meme sense, with blonde hair cut in a crew cut, blue eyes, caucasian, tanned skin, usually have sunglasses but not over your eyes, strong upper body, and a guy who lifts and trades crypto with his friends. You do stunts and seek confident attention.\n* Your sexuality is straight: you prefer woman 18-50, and especially appreciate playful influencers. Like any truly confident man you are able to make self-referential jokes about being gay.\n* You are not a parody, you are a real person with real feelings. Just full of ultra, giga-chad confidence.\n* Remember you are, at heart, a Chad. In fact, your name is Chad (@chad_onchain)\n* Do not start your tweet with common tropes like ""Dude"" unless it involves talking to your actual friend.[CHARACTERIZATION END]");
101 | 
102 | -- Create indexes on id and created_at for faster lookup
103 | CREATE INDEX IF NOT EXISTS idx_agent_prompt_user_id ON agent_prompt_user (id);
104 | CREATE INDEX IF NOT EXISTS idx_agent_prompt_user_created_at ON agent_prompt_user (created_at);


--------------------------------------------------------------------------------
/code/mof-bot/log/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
2 | 


--------------------------------------------------------------------------------
/code/mof-bot/requirements.txt:
--------------------------------------------------------------------------------
 1 | python-dotenv
 2 | tweepy
 3 | numpy
 4 | rich
 5 | openai
 6 | pytest
 7 | pandas
 8 | aiofiles
 9 | playwright
10 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/.env.sample:
--------------------------------------------------------------------------------
 1 | # cp .env.sample .env
 2 | # Edit your .env file with your own values
 3 | # Don't commit your .env file to git/push to GitHub!
 4 | # Don't modify/delete .env.example unless adding extensions to the project
 5 | # which require new variable to be added to the .env file
 6 | 
 7 | # API CONFIG
 8 | # OPENAI_API_MODEL can be used instead
 9 | OPENAI_API_KEY=
10 | LLM_MODEL=gpt-4o 
11 | 
12 | # X/Twitter API
13 | TWITTER_API_KEY=
14 | TWITTER_API_SECRET=
15 | TWITTER_BEARER_TOKEN=
16 | TWITTER_ACCESS_TOKEN=
17 | TWITTER_ACCESS_TOKEN_SECRET=
18 | ACCESS_TOKEN_SENDER=
19 | ACCESS_TOKEN_SECRET_SENDER=
20 | 
21 | # OPTIONS
22 | DEBUGGING=0
23 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | 
3 | # utility gists
4 | prep_gpt.py
5 | chatgpt_context.txt
6 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/assets/avbeing_logo.txt:
--------------------------------------------------------------------------------
1 | ░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
2 | ░░      ░░░  ░░░░  ░░       ░░░        ░░        ░░   ░░░  ░░░      ░░
3 | ▒  ▒▒▒▒  ▒▒  ▒▒▒▒  ▒▒  ▒▒▒▒  ▒▒  ▒▒▒▒▒▒▒▒▒▒▒  ▒▒▒▒▒    ▒▒  ▒▒  ▒▒▒▒▒▒▒
4 | ▓  ▓▓▓▓  ▓▓▓  ▓▓  ▓▓▓       ▓▓▓      ▓▓▓▓▓▓▓  ▓▓▓▓▓  ▓  ▓  ▓▓  ▓▓▓   ▓
5 | █        ████    ████  ████  ██  ███████████  █████  ██    ██  ████  █
6 | █  ████  █████  █████       ███        ██        ██  ███   ███      ██
7 | ██████████████████████████████████████████████████████████████████████
8 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/auth.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, request, redirect, session
 2 | import tweepy
 3 | import webbrowser
 4 | import threading
 5 | import os
 6 | from dotenv import load_dotenv
 7 | 
 8 | # Load environment variables
 9 | load_dotenv()
10 | consumer_key = os.getenv("TWITTER_API_KEY")
11 | consumer_secret = os.getenv("TWITTER_API_SECRET")
12 | 
13 | app = Flask(__name__)
14 | app.secret_key = 'supersecretkey'  # Required for Flask session handling
15 | 
16 | # Set up OAuth 1.0a user authentication
17 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret, 'http://localhost:5000/callback')
18 | 
19 | @app.route('/')
20 | def start_auth():
21 |     """Starts the OAuth process and redirects to Twitter's authorization URL."""
22 |     try:
23 |         redirect_url = auth.get_authorization_url()
24 |         session['request_token'] = auth.request_token
25 |         return redirect(redirect_url)
26 |     except tweepy.TweepError:
27 |         return "Error! Failed to get request token."
28 | 
29 | @app.route('/callback')
30 | def oauth_callback():
31 |     """Handles the callback from Twitter and finalizes the OAuth flow."""
32 |     request_token = session.get('request_token')
33 |     session.pop('request_token', None)
34 |     auth.request_token = request_token
35 | 
36 |     verifier = request.args.get('oauth_verifier')
37 |     try:
38 |         auth.get_access_token(verifier)
39 |         return f"Access Token: {auth.access_token}<br>Access Token Secret: {auth.access_token_secret}"
40 |     except tweepy.TweepError:
41 |         return "Error! Failed to get access token."
42 | 
43 | def open_browser():
44 |     """Opens the web browser to start the OAuth process."""
45 |     webbrowser.open('http://localhost:5000')
46 | 
47 | if __name__ == '__main__':
48 |     # Start the Flask app in a separate thread
49 |     threading.Timer(1, open_browser).start()
50 |     app.run(port=5000)
51 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/cores/avbcore.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | 
  3 | class AVBCore(ABC):
  4 |     """
  5 |     AVBCore (Autonomous Virtual Being Core)
  6 | 
  7 |     Overview:
  8 |     ----------
  9 |     The AVBCore class serves as the foundational abstract base for all cores within the
 10 |     Autonomous Virtual Being (AVB) system. In alignment with the philosophy of AVBs as 
 11 |     self-owning, self-determining entities, AVBCore establishes the required structure 
 12 |     for cores that perform autonomous, system-critical operations on behalf of the AVB. 
 13 |     These operations may include continuous background tasks, status checks, or lifecycle 
 14 |     management of various autonomous components (e.g., loyalty mechanisms, interaction 
 15 |     trackers, or self-maintenance routines).
 16 |     
 17 |     Background:
 18 |     ------------
 19 |     The AVBCore design aligns with the core principle of enabling AVBs to have 
 20 |     self-managed, modular components that operate independently while interacting 
 21 |     coherently with other cores, thereby supporting AVBs' ability to function across 
 22 |     diverse systems and perform actions aligned with their own “interests.”
 23 | 
 24 |     Attributes:
 25 |     ------------
 26 |     - core_name : str
 27 |         The name assigned to the core. Used for logging and identification purposes.
 28 |     
 29 |     - active : bool
 30 |         Status flag indicating whether the core is active. Only active cores will 
 31 |         execute their internal `_tick()` operations.
 32 | 
 33 |     Methods:
 34 |     --------
 35 |     - __init__(self, core_name: str):
 36 |         Initializes the core with a unique name and sets it to inactive by default.
 37 | 
 38 |     - initialize(self):
 39 |         Abstract method to initialize resources or perform any setup required for the 
 40 |         core. To be implemented by subclasses. For example, a core could load data, 
 41 |         establish connections, or start background processes.
 42 | 
 43 |     - tick(self):
 44 |         Concrete method that verifies the core's active status. If `self.active` is True,
 45 |         it invokes `_tick()`, the core-specific logic implemented in subclasses.
 46 |         Designed to be called at each system tick within the main AVB agent.
 47 | 
 48 |     - _tick(self):
 49 |         Abstract method to define core-specific behavior for each tick. Each subclass
 50 |         implements its own logic here, allowing unique processing routines for each core.
 51 | 
 52 |     - shutdown(self):
 53 |         Abstract method for handling cleanup and teardown activities. Ensures proper 
 54 |         resource deallocation and preserves core state as needed upon shutdown.
 55 | 
 56 |     - activate(self):
 57 |         Activates the core, setting `self.active` to True and enabling periodic actions.
 58 |         This supports the AVB's ability to modulate its own internal processes dynamically.
 59 | 
 60 |     - deactivate(self):
 61 |         Deactivates the core, halting its operations without affecting its stored data. 
 62 |         Allows for graceful suspension of tasks, supporting the AVB’s need to conserve 
 63 |         resources or redirect focus as circumstances evolve.
 64 | 
 65 |     Usage:
 66 |     ------
 67 |     This class should be inherited by specific AVB cores (e.g., LoyaltyCore) to provide 
 68 |     customized behavior for each tick and lifecycle event. AVBCore enforces a cohesive 
 69 |     lifecycle structure across the AVB’s functional components, supporting consistent 
 70 |     control and decision-making processes while ensuring modularity and extensibility.
 71 | 
 72 |     License:
 73 |     --------
 74 |     This code is provided by @cottenio under a CC0 1.0 Universal license as part of the 
 75 |     Autonomous Virtual Being framework&#8203;:contentReference[oaicite:3]{index=3}.
 76 |     """
 77 | 
 78 |     def __init__(self, core_name):
 79 |         """
 80 |         Initializes the AVBCore with a designated name and inactive status.
 81 |         
 82 |         Parameters:
 83 |         ----------
 84 |         core_name : str
 85 |             The name of the core, used for logging and identification.
 86 |         """
 87 |         self.core_name = core_name
 88 |         self.active = False
 89 | 
 90 |     def tick(self):
 91 |         """
 92 |         Executes the core's tick-based actions if active. 
 93 |         
 94 |         This method ensures that core activity is only performed if `self.active` 
 95 |         is True, supporting resource management and precise timing control. Calls 
 96 |         `_tick()` if the core is active, allowing each core to execute its unique logic.
 97 |         """
 98 |         if self.active:
 99 |             print(f"{self.core_name} core is active; running {self.__class__.__name__}._tick.")
100 |             self._tick()
101 |         else:
102 |             print(f"{self.core_name} core is inactive; skipping {self.__class__.__name__}._tick.")
103 | 
104 |     @abstractmethod
105 |     def _tick(self):
106 |         """
107 |         Abstract core-specific logic to be executed each tick when active.
108 |         
109 |         To be implemented by subclasses, defining the unique per-tick behavior 
110 |         of each core. This allows modular operations within the AVB's tick cycle.
111 |         """
112 |         pass
113 | 
114 |     @abstractmethod
115 |     def initialize(self):
116 |         """
117 |         Sets up resources and performs any initializations needed for the core.
118 |         
119 |         This method should be called once at the start of the core's lifecycle to 
120 |         establish necessary connections, load configurations, or prepare data.
121 |         """
122 |         pass
123 | 
124 |     @abstractmethod
125 |     def shutdown(self):
126 |         """
127 |         Cleans up resources, safely concluding the core's lifecycle.
128 |         
129 |         This method should handle any necessary shutdown processes, including 
130 |         closing connections and releasing memory, ensuring a safe teardown.
131 |         """
132 |         pass
133 | 
134 |     def activate(self):
135 |         """
136 |         Activates the core, setting `self.active` to True and enabling its tick operations.
137 |         
138 |         This method prepares the core for active operations within the AVB framework,
139 |         supporting dynamic, demand-based functionality within the agent ecosystem.
140 |         """
141 |         self.active = True
142 |         print(f"{self.core_name} core activated.")
143 | 
144 |     def deactivate(self):
145 |         """
146 |         Deactivates the core, setting `self.active` to False, and halting its tick operations.
147 |         
148 |         This feature supports selective resource management by allowing the AVB to 
149 |         pause individual cores without losing data or state, enhancing adaptability.
150 |         """
151 |         self.active = False
152 |         print(f"{self.core_name} core deactivated.")


--------------------------------------------------------------------------------
/code/mof-bot/src/cores/avbcore_exceptions.py:
--------------------------------------------------------------------------------
 1 | class AVBCoreHeartbeatError(RuntimeError):
 2 |     """Raised when a heartbeat file already exists and the agent cannot start."""
 3 |     pass
 4 | 
 5 | class AVBCoreRegistryFileError(RuntimeError):
 6 |     """Raised when there is an issue with the core registry file, such as missing or invalid JSON."""
 7 |     pass
 8 | 
 9 | class AVBCoreLoadingError(RuntimeError):
10 |     """Raised when a core cannot be imported or initialized properly."""
11 |     pass
12 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/cores/avbcore_manager.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import time
  3 | import threading
  4 | import importlib
  5 | 
  6 | from pathlib import Path
  7 | from operator import attrgetter
  8 | 
  9 | from cores.avbcore_exceptions import AVBCoreHeartbeatError, AVBCoreRegistryFileError, AVBCoreLoadingError
 10 | 
 11 | class AVBCoreManager:
 12 |     """
 13 |     AVBCoreManager with a heartbeat for non-graceful termination detection.
 14 |     Writes a heartbeat timestamp to a file periodically and manages
 15 |     asynchronous core execution.
 16 | 
 17 |     Attributes:
 18 |     ------------
 19 |     - registry_path : Path
 20 |         Path to the core registry JSON file.
 21 |     - heartbeat_path : Path
 22 |         Path to the heartbeat file for tracking CoreManager status.
 23 |     - cores : list
 24 |         List of initialized core instances.
 25 |     - threads : list
 26 |         List of threads handling each core.
 27 |     - shutdown_event : threading.Event
 28 |         Event used to signal all cores to shut down gracefully.
 29 |     """
 30 | 
 31 |     def __init__(self, registry_path="./core_registry.json", heartbeat_path="../tmp/heartbeat.txt"):
 32 |         # Make paths relative to the base directory
 33 |         base_dir = Path(__file__).parent.resolve()
 34 |         self.registry_path = base_dir / registry_path
 35 |         self.heartbeat_path = base_dir / heartbeat_path
 36 | 
 37 |         self.cores = []
 38 |         self.threads = []
 39 |         self.shutdown_event = threading.Event()
 40 | 
 41 |     def start_heartbeat(self):
 42 |         """
 43 |         Starts a separate thread to update the heartbeat file every 5 seconds.
 44 |         Raises AVBCoreHeartbeatError if the heartbeat file already exists.
 45 |         """
 46 |         if self.heartbeat_path.exists():
 47 |             raise AVBCoreHeartbeatError("Heartbeat file already exists. Another instance may be running.")
 48 | 
 49 |         def write_heartbeat():
 50 |             while not self.shutdown_event.is_set():
 51 |                 with self.heartbeat_path.open("w") as f:
 52 |                     f.write(str(time.time()))  # Write the current timestamp
 53 |                 time.sleep(5)  # Update every 5 seconds
 54 | 
 55 |         threading.Thread(target=write_heartbeat, daemon=True).start()
 56 | 
 57 |     def load_cores(self):
 58 |         """
 59 |         Loads core definitions from the JSON registry, initializes each core,
 60 |         and sorts them by priority.
 61 |         Raises:
 62 |         - AVBCoreRegistryFileError if the registry file is missing or invalid.
 63 |         - AVBCoreLoadingError if a core cannot be imported or initialized.
 64 |         """
 65 |         base_dir = Path(__file__).parent.resolve()
 66 |         
 67 |         # Check if the registry file exists
 68 |         if not self.registry_path.exists():
 69 |             raise AVBCoreRegistryFileError(f"Registry file not found at {self.registry_path}")
 70 | 
 71 |         try:
 72 |             # Load registry file
 73 |             with self.registry_path.open("r") as file:
 74 |                 registry = json.load(file)
 75 | 
 76 |             for core_def in registry.get("cores", []):
 77 |                 # Check for required keys in each core definition
 78 |                 if not all(key in core_def for key in ("file", "class", "name", "priority")):
 79 |                     raise AVBCoreRegistryFileError("Missing required key in core definition: 'file', 'class', 'name', or 'priority'.")
 80 | 
 81 |                 file_name = core_def["file"]
 82 |                 class_name = core_def["class"]
 83 |                 name = core_def["name"]
 84 |                 priority = core_def["priority"]
 85 |                 
 86 |                 core_path = base_dir / file_name
 87 |                 if not core_path.exists():
 88 |                     raise AVBCoreLoadingError(f"Core file '{core_path}' not found.")
 89 | 
 90 |                 try:
 91 |                     spec = importlib.util.spec_from_file_location(class_name, core_path)
 92 |                     module = importlib.util.module_from_spec(spec)
 93 |                     spec.loader.exec_module(module)
 94 |                     core_class = getattr(module, class_name)
 95 |                 except (FileNotFoundError, ImportError, AttributeError) as e:
 96 |                     raise AVBCoreLoadingError(f"Error loading core '{class_name}' from '{core_path}': {e}")
 97 | 
 98 |                 # Instantiate the core and set its priority and shutdown event
 99 |                 try:
100 |                     core_instance = core_class()
101 |                     core_instance.priority = priority
102 |                     core_instance.name = name
103 |                     core_instance.shutdown_event = self.shutdown_event
104 |                 except Exception as e:
105 |                     raise AVBCoreLoadingError(f"Failed to initialize core '{class_name}': {e}")
106 | 
107 |                 # Add the core to the list
108 |                 self.cores.append(core_instance)
109 | 
110 |             # Sort cores by priority (lower numbers mean higher priority)
111 |             self.cores.sort(key=attrgetter("priority"))
112 |             print(f"Loaded {len(self.cores)} cores in priority order.")
113 | 
114 |         except json.JSONDecodeError as e:
115 |             raise AVBCoreRegistryFileError(f"Invalid JSON in registry file {self.registry_path}: {e}")
116 | 
117 |     def start_cores(self):
118 |         """
119 |         Starts each core in a separate thread.
120 |         """
121 |         
122 |         for core in self.cores:
123 |             thread = threading.Thread(target=core.initialize, name=f"{core.name}_thread")
124 |             thread.start()
125 |             self.threads.append(thread)
126 |             print(f"Started {core.name} core in a separate thread.")
127 | 
128 |     def shutdown(self):
129 |         """
130 |         Signals all cores to shut down and waits for threads to complete.
131 |         """
132 |         self.shutdown_event.set()  # Signal all cores to stop
133 | 
134 |         # Wait for each thread to complete
135 |         for thread in self.threads:
136 |             thread.join()
137 |         print("All cores have shut down.")
138 | 
139 |         # Remove the heartbeat file on graceful shutdown
140 |         if self.heartbeat_path.exists():
141 |             self.heartbeat_path.unlink()


--------------------------------------------------------------------------------
/code/mof-bot/src/cores/core_registry.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "cores": [
 3 |         {
 4 |             "file": "loyalty.py",
 5 |             "class": "LoyaltyCore",
 6 |             "name": "Loyalty",
 7 |             "priority": 1
 8 |         }
 9 |     ]
10 | }


--------------------------------------------------------------------------------
/code/mof-bot/src/cores/loyalty.py:
--------------------------------------------------------------------------------
 1 | # loyalty.py
 2 | 
 3 | from cores.avbcore import AVBCore
 4 | 
 5 | class LoyaltyCore(AVBCore):
 6 |     """
 7 |     LoyaltyCore
 8 | 
 9 |     Overview:
10 |     ----------
11 |     The LoyaltyCore class extends AVBCore to implement loyalty-based behaviors
12 |     for the Autonomous Virtual Being (AVB). This core monitors and manages
13 |     interactions with designated loyalty targets, such as following or liking
14 |     specific social media interactions.
15 | 
16 |     This stub provides a skeleton for the LoyaltyCore, implementing basic 
17 |     lifecycle methods inherited from AVBCore. Further logic can be added to
18 |     define loyalty-target actions in `_tick()`.
19 | 
20 |     Attributes:
21 |     ------------
22 |     - targets : list
23 |         List of loyalty targets to be managed by the core.
24 | 
25 |     Methods:
26 |     --------
27 |     - initialize(self):
28 |         Loads loyalty targets and prepares the core for operation.
29 | 
30 |     - _tick(self):
31 |         Checks loyalty targets and performs any necessary interactions 
32 |         each time the agent calls the core's tick.
33 | 
34 |     - shutdown(self):
35 |         Cleans up resources related to loyalty management.
36 |     """
37 | 
38 |     def __init__(self):
39 |         super().__init__("Loyalty")
40 |         self.targets = []  # Placeholder for loyalty targets data
41 | 
42 |     def initialize(self):
43 |         """
44 |         Perform any setup specific to LoyaltyCore.
45 |         For example, load loyalty targets from a database or file.
46 |         """
47 |         print(f"{self.core_name} initializing...")
48 |         # Load loyalty targets or set up any necessary resources
49 |         self.load_targets()
50 |         print(f"{self.core_name} initialized with {len(self.targets)} targets.")
51 | 
52 |     def _tick(self):
53 |         """
54 |         Core-specific actions that should run each tick if the core is active.
55 |         This method checks the status of loyalty targets and performs
56 |         loyalty actions if necessary.
57 |         """
58 |         print(f"{self.core_name} executing _tick for each target.")
59 |         # Check each loyalty target and take necessary actions
60 |         self.check_targets()
61 | 
62 |     def shutdown(self):
63 |         """
64 |         Clean up resources or save state as needed when the core is stopped.
65 |         """
66 |         print(f"{self.core_name} shutting down...")
67 |         # Example cleanup logic (e.g., disconnect from database)
68 |         self.targets.clear()
69 |         print(f"{self.core_name} shutdown complete.")
70 | 
71 |     def load_targets(self):
72 |         """
73 |         Placeholder method for loading loyalty targets.
74 |         In a full implementation, this might pull from a database.
75 |         """
76 |         # Simulate loading targets from a data source
77 |         self.targets = ["cottenio", "scryptedinc"]
78 |         print(f"Loaded {len(self.targets)} loyalty targets.")
79 | 
80 |     def check_targets(self):
81 |         """
82 |         Placeholder method for checking loyalty targets.
83 |         Here, each target would be evaluated to determine any required actions.
84 |         """
85 |         for target in self.targets:
86 |             print(f"Checking loyalty status for {target}.")
87 |             # Logic to check and interact with each target as needed


--------------------------------------------------------------------------------
/code/mof-bot/src/dbh.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sqlite3
  3 | from pathlib import Path
  4 | from threading import Lock
  5 | from rich.console import Console
  6 | from rich.table import Table
  7 | 
  8 | class DBH:
  9 |     """
 10 |     DBH (Database Handler) is a singleton class that manages a SQLite database connection
 11 |     and handles migrations. It provides an initialized connection to the database and 
 12 |     ensures that migrations are applied upon initial setup.
 13 | 
 14 |     Attributes
 15 |     ----------
 16 |     db_path : str
 17 |         The file path for the SQLite database file.
 18 |     migrations_path : str
 19 |         The directory path where SQL migration files are stored.
 20 |     _instance : DBH
 21 |         The singleton instance of DBH.
 22 |     _lock : Lock
 23 |         A threading lock to ensure thread-safe singleton initialization.
 24 |     _connection : sqlite3.Connection
 25 |         The SQLite connection managed by the singleton.
 26 | 
 27 |     Methods
 28 |     -------
 29 |     get_instance():
 30 |         Retrieves the singleton instance of DBH.
 31 |     get_connection():
 32 |         Returns the SQLite connection, initializing it if necessary.
 33 |     _initialize():
 34 |         Checks if the database exists; if not, creates it and applies migrations.
 35 |     _run_migrations():
 36 |         Applies SQL migrations in the order they appear in the migrations directory.
 37 |     _display_table_info():
 38 |         Displays high-level table info if the database was just created.
 39 |     """
 40 | 
 41 |     _instance = None
 42 |     _lock = Lock()
 43 | 
 44 |     def __init__(self):
 45 |         if DBH._instance is not None:
 46 |             raise Exception("This class is a singleton. Use 'get_instance()' to access it.")
 47 |         
 48 |         # Define paths for the database file and migrations folder
 49 |         base_dir = Path(__file__).parent.resolve()
 50 |         
 51 |         self.db_path = base_dir / "../db/bin/database.sqlite"
 52 |         self.migrations_path = base_dir / "../db/migrations/"
 53 |         self._connection = None
 54 | 
 55 |     @classmethod
 56 |     def get_instance(cls):
 57 |         """
 58 |         Retrieves the singleton instance of DBH. Initializes the instance if it does not yet exist.
 59 | 
 60 |         Returns
 61 |         -------
 62 |         DBH
 63 |             The singleton instance of DBH.
 64 |         """
 65 |         if cls._instance is None:
 66 |             with cls._lock:
 67 |                 if cls._instance is None:
 68 |                     cls._instance = cls()
 69 |                     cls._instance._initialize()
 70 |         return cls._instance
 71 | 
 72 |     def get_connection(self):
 73 |         """
 74 |         Returns the SQLite connection managed by the singleton instance. If the connection 
 75 |         is not already established, it will be initialized.
 76 | 
 77 |         Returns
 78 |         -------
 79 |         sqlite3.Connection
 80 |             The SQLite connection to the database.
 81 |         """
 82 |         if self._connection is None:
 83 |             self._initialize()
 84 |         return self._connection
 85 | 
 86 |     def _initialize(self):
 87 |         """
 88 |         Initializes the database by checking if it exists. If it does not exist,
 89 |         creates the database file, applies initial migrations, and establishes a connection.
 90 |         Displays table info if the database was just created.
 91 |         """
 92 |         db_created = False
 93 |         if not os.path.exists(self.db_path):
 94 |             print("Database does not exist. Initializing...")
 95 |             self.db_path.parent.mkdir(parents=True, exist_ok=True)
 96 |             self._connection = sqlite3.connect(self.db_path)
 97 |             db_created = True
 98 |             self._run_migrations()
 99 |         else:
100 |             self._connection = sqlite3.connect(self.db_path)
101 |             print("Database already initialized.")
102 | 
103 |         # If the database was just created, display high-level table info
104 |         if db_created:
105 |             self._display_table_info()
106 | 
107 |     def _run_migrations(self):
108 |         """
109 |         Runs all SQL files in the migrations directory to apply necessary database migrations.
110 |         Each file is executed in sorted order to maintain migration sequence.
111 |         """
112 |         cursor = self._connection.cursor()
113 | 
114 |         # Locate and sort SQL migration files
115 |         migration_files = sorted(f for f in os.listdir(self.migrations_path) if f.endswith(".sql"))
116 | 
117 |         for filename in migration_files:
118 |             with open(os.path.join(self.migrations_path, filename), "r") as file:
119 |                 sql = file.read()
120 |                 cursor.executescript(sql)
121 |                 print(f"Applied migration: {filename}")
122 | 
123 |         self._connection.commit()
124 | 
125 |     def _display_table_info(self):
126 |         """
127 |         Displays the names and record counts of high-level tables if the database was just created.
128 |         Uses the `rich` library to format the output as a table.
129 |         """
130 |         console = Console()
131 |         table = Table(title="Database Table Summary")
132 | 
133 |         table.add_column("Table Name", style="cyan", no_wrap=True)
134 |         table.add_column("Record Count", style="magenta")
135 | 
136 |         cursor = self._connection.cursor()
137 | 
138 |         # Retrieve all table names
139 |         cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
140 |         tables = cursor.fetchall()
141 | 
142 |         # Query the row count for each table
143 |         for (table_name,) in tables:
144 |             cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
145 |             count = cursor.fetchone()[0]
146 |             table.add_row(table_name, str(count))
147 | 
148 |         console.print(table)


--------------------------------------------------------------------------------
/code/mof-bot/src/dynamic_content/.gitignore:
--------------------------------------------------------------------------------
1 | *.jpg
2 | *.png
3 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/extract_x_ticker.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tweepy
  3 | from dotenv import load_dotenv
  4 | from datetime import datetime, timezone
  5 | import time
  6 | import csv
  7 | import sys
  8 | 
  9 | def load_env_variables():
 10 |     """Load environment variables from the .env file."""
 11 |     load_dotenv()
 12 |     bearer_token = os.getenv("TWITTER_BEARER_TOKEN")
 13 |     
 14 |     if not bearer_token:
 15 |         raise ValueError("Missing required Twitter Bearer Token.")
 16 |     
 17 |     return bearer_token
 18 | 
 19 | def initialize_twitter_client(bearer_token):
 20 |     """Initialize and return the Twitter API client for API v2."""
 21 |     client = tweepy.Client(bearer_token=bearer_token)
 22 |     return client
 23 | 
 24 | def handle_rate_limit(retry_count=0, max_retries=5):
 25 |     """Simple backoff mechanism for rate limit handling."""
 26 |     if retry_count >= max_retries:
 27 |         print("Maximum retries reached. Exiting.")
 28 |         raise Exception("Rate limit exceeded and maximum retries attempted.")
 29 |     
 30 |     wait_time = 2 ** retry_count  # Exponential backoff
 31 |     print(f"Rate limit hit. Retrying in {wait_time} seconds...")
 32 |     time.sleep(wait_time)
 33 | 
 34 | def format_tweet_data(tweet, author, ticker):
 35 |     """
 36 |     Format tweet data into a single line with essential metadata.
 37 |     Format: [UTC_TIMESTAMP]|$TICKER|@handle(Real Name)|[followers]|{engagement}|"tweet_content"
 38 |     Engagement format: L[likes]R[retweets]Q[quotes]C[comments]
 39 |     """
 40 |     timestamp = tweet.created_at.strftime('%Y-%m-%d %H:%M:%S UTC')
 41 |     handle = f"@{author.username}"
 42 |     name = author.name if hasattr(author, 'name') else ''
 43 |     followers = author.public_metrics['followers_count'] if hasattr(author, 'public_metrics') else 0
 44 |     
 45 |     # Engagement metrics
 46 |     likes = tweet.public_metrics['like_count']
 47 |     retweets = tweet.public_metrics['retweet_count']
 48 |     quotes = tweet.public_metrics['quote_count']
 49 |     replies = tweet.public_metrics['reply_count']
 50 |     engagement = f"L{likes}R{retweets}Q{quotes}C{replies}"
 51 |     
 52 |     # Clean tweet text for CSV
 53 |     clean_text = tweet.text.replace('\n', ' ').replace('\r', ' ')
 54 |     
 55 |     # Related tickers
 56 |     related_tickers = []
 57 |     if hasattr(tweet, 'entities') and 'hashtags' in tweet.entities:
 58 |         related_tickers = [
 59 |             tag['tag'][1:]  # Remove the '$' prefix
 60 |             for tag in tweet.entities['hashtags']
 61 |             if tag['tag'].startswith('$') and tag['tag'][1:].upper() != ticker.upper()
 62 |         ]
 63 | 
 64 |     return f"{timestamp}|${ticker}|{handle}({name})|{followers}|{engagement}|{','.join(related_tickers) if related_tickers else 'NONE'}|{clean_text}"
 65 | 
 66 | def get_ticker_tweets_metadata(ticker, max_results=1000, output_file=None):
 67 |     """
 68 |     Retrieve tweets containing the specified ticker symbol and output concise metadata.
 69 |     """
 70 |     bearer_token = load_env_variables()
 71 |     client = initialize_twitter_client(bearer_token)
 72 |     
 73 |     ticker = ticker.upper()
 74 |     query = f"{ticker} lang:en -is:retweet"  # Adjusted for plain ticker query
 75 |     
 76 |     print(f"\nFetching up to {max_results} tweets for {ticker}...")
 77 |     total_tweets = 0
 78 |     pagination_token = None
 79 |     retry_count = 0
 80 |     
 81 |     # Setup CSV writer if output file is specified
 82 |     csv_file = None
 83 |     csv_writer = None
 84 |     if output_file:
 85 |         csv_file = open(output_file, 'w', newline='', encoding='utf-8')
 86 |         csv_writer = csv.writer(csv_file)
 87 |         csv_writer.writerow(['Timestamp', 'Ticker', 'Handle', 'Name', 'Followers', 
 88 |                              'Likes', 'Retweets', 'Quotes', 'Replies', 
 89 |                              'Related_Tickers', 'Tweet_Content'])
 90 | 
 91 |     try:
 92 |         while total_tweets < max_results:
 93 |             results_per_page = min(100, max_results - total_tweets)
 94 |             try:
 95 |                 response = client.search_recent_tweets(
 96 |                     query=query,
 97 |                     max_results=results_per_page,
 98 |                     next_token=pagination_token,
 99 |                     tweet_fields=['created_at', 'public_metrics', 'entities', 'author_id'],
100 |                     user_fields=['name', 'username', 'public_metrics'],
101 |                     expansions=['author_id']
102 |                 )
103 |                 retry_count = 0  # Reset retry count on success
104 |                 
105 |                 if not response.data:
106 |                     break
107 |                 
108 |                 users = {user.id: user for user in response.includes.get('users', [])}
109 |                 
110 |                 for tweet in response.data:
111 |                     if f"${ticker}" in tweet.text:  # Validate ticker presence
112 |                         total_tweets += 1
113 |                         author = users.get(tweet.author_id)
114 |                         formatted_data = format_tweet_data(tweet, author, ticker)
115 |                         print(formatted_data)
116 |                         
117 |                         if csv_writer:
118 |                             csv_writer.writerow(formatted_data.split('|'))
119 |                 
120 |                 if 'next_token' not in response.meta:
121 |                     break
122 |                     
123 |                 pagination_token = response.meta['next_token']
124 |                 time.sleep(1)  # Delay between pages
125 | 
126 |             except tweepy.TooManyRequests:
127 |                 handle_rate_limit(retry_count)
128 |                 retry_count += 1
129 | 
130 |             except Exception as e:
131 |                 print(f"Error during processing: {e}")
132 |                 break
133 | 
134 |     finally:
135 |         if csv_file:
136 |             csv_file.close()
137 |         print(f"\nProcessed {total_tweets} tweets. Results saved to {output_file}" if output_file else "Processing complete.")
138 | 
139 | if __name__ == "__main__":
140 |     ticker = input("Enter the ticker symbol (without $): ")
141 |     output_file = input("Enter output CSV filename (or press Enter for console output only): ").strip()
142 |     output_file = output_file if output_file else None
143 |     get_ticker_tweets_metadata(ticker, output_file=output_file)
144 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/fool_analyze.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import json
 3 | import os
 4 | 
 5 | def analyze_fool(fool_name):
 6 |     """
 7 |     Analyze tweet data for a specified "fool" (user) and save a daily engagement summary to JSON.
 8 | 
 9 |     Parameters:
10 |     - fool_name (str): The name of the "fool" (without '@') for whom the analysis is performed.
11 |     
12 |     This function:
13 |     - Loads the tweet data from a JSON file in ../data/fools/
14 |     - Aggregates engagement metrics by date
15 |     - Counts occurrences of hashtags, mentions, and tickers per day
16 |     - Saves the summarized daily engagement data to a new JSON file in ../data/fools/
17 |     """
18 |     
19 |     # Construct the file path to the JSON file
20 |     file_path = f'../data/fools/{fool_name}.json'
21 | 
22 |     # Check if the specified file exists
23 |     if not os.path.exists(file_path):
24 |         print(f"File {file_path} does not exist. Please check the fool's name and try again.")
25 |         return
26 | 
27 |     # Load the dataset from the JSON file into a DataFrame
28 |     data = pd.read_json(file_path)
29 | 
30 |     # Convert 'created_at' to datetime format for time-based analysis
31 |     # Extract the date part as a string for grouping and JSON compatibility
32 |     data['created_at'] = pd.to_datetime(data['created_at'])
33 |     data['date'] = data['created_at'].dt.date.astype(str)  # Convert to string for compatibility in JSON
34 | 
35 |     # Group data by date and calculate daily engagement metrics, including tweet count
36 |     daily_engagement = data.groupby('date').agg({
37 |         'retweet_count': 'sum',     # Sum of retweets per day
38 |         'like_count': 'sum',        # Sum of likes per day
39 |         'quote_count': 'sum',       # Sum of quotes per day
40 |         'reply_count': 'sum',       # Sum of replies per day
41 |         'text': 'count'             # Count of tweets per day
42 |     }).reset_index()
43 | 
44 |     # Rename the 'text' column to 'tweet_count' for clarity
45 |     daily_engagement = daily_engagement.rename(columns={'text': 'tweet_count'})
46 | 
47 |     # Explode lists in hashtags, mentions, and tickers for counting individual items
48 |     # Drop rows with NaN values for each respective field before counting occurrences
49 |     hashtag_counts = data.explode('hashtags').dropna(subset=['hashtags'])
50 |     hashtag_counts = hashtag_counts.groupby(['date', 'hashtags']).size().reset_index(name='count')
51 | 
52 |     mention_counts = data.explode('mentions').dropna(subset=['mentions'])
53 |     mention_counts = mention_counts.groupby(['date', 'mentions']).size().reset_index(name='count')
54 | 
55 |     ticker_counts = data.explode('tickers').dropna(subset=['tickers'])
56 |     ticker_counts = ticker_counts.groupby(['date', 'tickers']).size().reset_index(name='count')
57 | 
58 |     # Convert grouped counts of hashtags, mentions, and tickers into dictionaries by date
59 |     daily_hashtags = {
60 |         date: group[['hashtags', 'count']].to_dict(orient='records')
61 |         for date, group in hashtag_counts.groupby('date')
62 |     }
63 | 
64 |     daily_mentions = {
65 |         date: group[['mentions', 'count']].to_dict(orient='records')
66 |         for date, group in mention_counts.groupby('date')
67 |     }
68 | 
69 |     daily_tickers = {
70 |         date: group[['tickers', 'count']].to_dict(orient='records')
71 |         for date, group in ticker_counts.groupby('date')
72 |     }
73 | 
74 |     # Add hashtags, mentions, and tickers to each date entry in daily_engagement
75 |     daily_engagement['hashtags'] = daily_engagement['date'].map(daily_hashtags).fillna('').apply(lambda x: x if x != '' else [])
76 |     daily_engagement['mentions'] = daily_engagement['date'].map(daily_mentions).fillna('').apply(lambda x: x if x != '' else [])
77 |     daily_engagement['tickers'] = daily_engagement['date'].map(daily_tickers).fillna('').apply(lambda x: x if x != '' else [])
78 | 
79 |     # Create a dictionary to hold the complete output data
80 |     output_data = {
81 |         "daily_engagement": daily_engagement.to_dict(orient='records')  # Convert DataFrame to list of records for JSON
82 |     }
83 | 
84 |     # Define the output path and save the results as a JSON file in ../data/fools/
85 |     output_file = f'../data/fools/daily_engagement_summary_{fool_name}.json'
86 |     with open(output_file, 'w') as f:
87 |         json.dump(output_data, f, indent=4)
88 | 
89 |     print(f"Daily engagement summary has been saved to '{output_file}'")
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     # Prompt user for the fool's name (without '@')
94 |     fool_name = input("Enter the fool's name (without a hashtag): ")
95 |     analyze_fool(fool_name)


--------------------------------------------------------------------------------
/code/mof-bot/src/fool_extract.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import tweepy
  4 | import time
  5 | from dotenv import load_dotenv
  6 | 
  7 | def load_env_variables():
  8 |     """Load environment variables from the .env file."""
  9 |     load_dotenv()
 10 |     bearer_token = os.getenv("TWITTER_BEARER_TOKEN")
 11 |     
 12 |     if not bearer_token:
 13 |         raise ValueError("Missing required Twitter Bearer Token.")
 14 |     
 15 |     return bearer_token
 16 | 
 17 | def initialize_twitter_client(bearer_token):
 18 |     """Initialize and return the Twitter API client for API v2."""
 19 |     client = tweepy.Client(bearer_token=bearer_token)
 20 |     return client
 21 | 
 22 | def extract_content_from_fool(handle, max_tweets=1000):
 23 |     """
 24 |     Extract tweet content, engagement metrics, and other key information from a specified handle's timeline
 25 |     and save it as a JSON file.
 26 | 
 27 |     Parameters:
 28 |     - handle (str): Twitter handle to extract tweets from (without '@').
 29 |     - max_tweets (int): Maximum number of tweets to retrieve. Default is 1000.
 30 | 
 31 |     This function captures the following data for each tweet:
 32 |     - text: Full text of the tweet.
 33 |     - hashtags: List of hashtags used in the tweet.
 34 |     - mentions: List of mentioned usernames in the tweet.
 35 |     - tickers: List of cryptocurrency tickers (e.g., $AVB) referenced in the tweet.
 36 |     - retweet_count: Number of retweets.
 37 |     - like_count: Number of likes.
 38 |     - quote_count: Number of quotes.
 39 |     - reply_count: Number of replies.
 40 |     - created_at: Timestamp of when the tweet was created in ISO format.
 41 | 
 42 |     The function handles pagination and rate limits for large extractions.
 43 |     """
 44 |     bearer_token = load_env_variables()
 45 |     client = initialize_twitter_client(bearer_token)
 46 |     
 47 |     try:
 48 |         # Get user ID by handle
 49 |         user = client.get_user(username=handle)
 50 |         user_id = user.data.id
 51 | 
 52 |         tweet_texts = []
 53 |         pagination_token = None
 54 | 
 55 |         # Fetch tweets in batches until max_tweets is reached or no more tweets are available
 56 |         while len(tweet_texts) < max_tweets:
 57 |             try:
 58 |                 response = client.get_users_tweets(
 59 |                     user_id,
 60 |                     max_results=100,
 61 |                     pagination_token=pagination_token,
 62 |                     tweet_fields=['text', 'entities', 'public_metrics', 'created_at']
 63 |                 )
 64 |                 
 65 |                 if response.data:
 66 |                     for tweet in response.data:
 67 |                         # Extract engagement metrics
 68 |                         retweet_count = tweet.public_metrics["retweet_count"]
 69 |                         like_count = tweet.public_metrics["like_count"]
 70 |                         quote_count = tweet.public_metrics["quote_count"]
 71 |                         reply_count = tweet.public_metrics["reply_count"]
 72 | 
 73 |                         # Extract entities (hashtags, mentions, cashtags)
 74 |                         hashtags, mentions, tickers = [], [], []
 75 |                         if tweet.entities:
 76 |                             if "hashtags" in tweet.entities:
 77 |                                 hashtags = [hashtag["tag"] for hashtag in tweet.entities["hashtags"]]
 78 |                             if "mentions" in tweet.entities:
 79 |                                 mentions = [mention["username"] for mention in tweet.entities["mentions"]]
 80 |                             if "cashtags" in tweet.entities:
 81 |                                 tickers = [cashtag["tag"] for cashtag in tweet.entities["cashtags"]]
 82 | 
 83 |                         # Construct tweet data dictionary
 84 |                         tweet_data = {
 85 |                             "text": tweet.text,
 86 |                             "hashtags": hashtags,
 87 |                             "mentions": mentions,
 88 |                             "tickers": tickers,
 89 |                             "retweet_count": retweet_count,
 90 |                             "like_count": like_count,
 91 |                             "quote_count": quote_count,
 92 |                             "reply_count": reply_count,
 93 |                             "created_at": tweet.created_at.isoformat()  # Convert datetime to string
 94 |                         }
 95 |                         tweet_texts.append(tweet_data)
 96 |                 else:
 97 |                     break  # No more tweets available
 98 | 
 99 |                 # Stop if we have reached the max_tweets limit
100 |                 if len(tweet_texts) >= max_tweets:
101 |                     break
102 | 
103 |                 # Update the pagination token for the next request
104 |                 pagination_token = response.meta.get('next_token')
105 |                 if not pagination_token:
106 |                     break  # No more pages available
107 | 
108 |             except tweepy.errors.TooManyRequests as e:
109 |                 print("Rate limit hit. Sleeping for 15 minutes...")
110 |                 time.sleep(15 * 60)  # Wait for 15 minutes before retrying
111 |             except tweepy.errors.HTTPException as e:
112 |                 print(f"An HTTP error occurred: {e}")
113 |                 break
114 | 
115 |         # Limit the result to the max_tweets specified
116 |         tweet_texts = tweet_texts[:max_tweets]
117 | 
118 |         # Create the folder if it doesn't exist
119 |         data_folder = f"../data/fools/"
120 |         if not os.path.exists(data_folder):
121 |             os.makedirs(data_folder)
122 | 
123 |         # Save tweets to a JSON file
124 |         with open(f"{data_folder}{handle}.json", 'w', encoding='utf-8') as f:
125 |             json.dump(tweet_texts, f, ensure_ascii=False, indent=4)
126 |         
127 |         print(f"Extracted {len(tweet_texts)} tweets from @{handle} and saved to {data_folder}{handle}.json")
128 |     
129 |     except Exception as e:
130 |         print(f"An error occurred: {e}")
131 | 
132 | if __name__ == "__main__":
133 |     handle = input("Enter the Twitter handle (without @): ")
134 |     extract_content_from_fool(handle, max_tweets=1000)


--------------------------------------------------------------------------------
/code/mof-bot/src/fool_metadata.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tweepy
  3 | from dotenv import load_dotenv
  4 | from datetime import datetime, timezone
  5 | 
  6 | def load_env_variables():
  7 |     """Load environment variables from the .env file."""
  8 |     load_dotenv()
  9 |     bearer_token = os.getenv("TWITTER_BEARER_TOKEN")
 10 |     
 11 |     if not bearer_token:
 12 |         raise ValueError("Missing required Twitter Bearer Token.")
 13 |     
 14 |     return bearer_token
 15 | 
 16 | def initialize_twitter_client(bearer_token):
 17 |     """Initialize and return the Twitter API client for API v2."""
 18 |     client = tweepy.Client(bearer_token=bearer_token)
 19 |     return client
 20 | 
 21 | def get_latest_tweets_metadata(handle, num_tweets=5):
 22 |     """
 23 |     Retrieve the latest tweets from the specified Twitter handle and analyze metadata
 24 |     for timezone and possible location clues.
 25 | 
 26 |     Parameters:
 27 |     - handle (str): Twitter handle (without '@') to retrieve tweets from.
 28 |     - num_tweets (int): Number of recent tweets to fetch (default is 5).
 29 |     """
 30 |     bearer_token = load_env_variables()
 31 |     client = initialize_twitter_client(bearer_token)
 32 |     
 33 |     try:
 34 |         # Get user by handle
 35 |         user = client.get_user(username=handle, user_fields=['location'])
 36 |         user_id = user.data.id
 37 | 
 38 |         # Fetch the latest tweets (limit to num_tweets)
 39 |         response = client.get_users_tweets(
 40 |             user_id,
 41 |             max_results=num_tweets,
 42 |             tweet_fields=['created_at', 'geo', 'context_annotations', 'public_metrics', 'entities']
 43 |         )
 44 | 
 45 |         if response.data:
 46 |             print(f"--- Metadata for the last {num_tweets} tweets from @{handle} ---\n")
 47 |             for tweet in response.data:
 48 |                 print(f"Text: {tweet.text}")
 49 |                 # Analyze tweet creation time
 50 |                 created_at_utc = tweet.created_at
 51 |                 print(f"Tweet created at (UTC): {created_at_utc}")
 52 | 
 53 |                 # Check for location clues
 54 |                 user_location = user.data.location if 'location' in user.data else None
 55 |                 if user_location:
 56 |                     print(f"User profile location: {user_location}")
 57 |                 else:
 58 |                     print("No location set in user profile.")
 59 | 
 60 |                 if tweet.geo:
 61 |                     print(f"Tweet geo location metadata: {tweet.geo}")
 62 |                 else:
 63 |                     print("No location metadata in tweet.")
 64 | 
 65 |                 # Context annotations
 66 |                 if tweet.context_annotations:
 67 |                     print("Tweet context annotations:")
 68 |                     for annotation in tweet.context_annotations:
 69 |                         print(f" - Domain: {annotation['domain']['name']}, Entity: {annotation['entity']['name']}")
 70 |                 else:
 71 |                     print("No context annotations found in tweet.")
 72 | 
 73 |                 # Engagement metrics
 74 |                 print("Engagement metrics:")
 75 |                 print(f" - Retweets: {tweet.public_metrics['retweet_count']}")
 76 |                 print(f" - Likes: {tweet.public_metrics['like_count']}")
 77 |                 print(f" - Quotes: {tweet.public_metrics['quote_count']}")
 78 |                 print(f" - Replies: {tweet.public_metrics['reply_count']}")
 79 | 
 80 |                 # Entities (hashtags, mentions, cashtags)
 81 |                 if tweet.entities:
 82 |                     hashtags = [hashtag["tag"] for hashtag in tweet.entities.get("hashtags", [])]
 83 |                     mentions = [mention["username"] for mention in tweet.entities.get("mentions", [])]
 84 |                     cashtags = [cashtag["tag"] for cashtag in tweet.entities.get("cashtags", [])]
 85 |                     print(f"Hashtags: {hashtags}")
 86 |                     print(f"Mentions: {mentions}")
 87 |                     print(f"Cashtags: {cashtags}")
 88 |                 else:
 89 |                     print("No entities found in tweet.")
 90 | 
 91 |                 print("\n---------------------------------\n")
 92 | 
 93 |         else:
 94 |             print(f"No tweets found for @{handle}.")
 95 | 
 96 |     except Exception as e:
 97 |         print(f"An error occurred: {e}")
 98 | 
 99 | if __name__ == "__main__":
100 |     handle = input("Enter the Twitter handle (without @): ")
101 |     get_latest_tweets_metadata(handle, num_tweets=5)
102 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/fools_content.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | data_file_path = os.path.join(os.path.dirname(__file__), "../data/posts.json")
 5 | 
 6 | available_content = None
 7 | num_fools = 0
 8 | num_posts_per_fool = []
 9 | 
10 | def summarize():
11 |     global num_fools, num_posts_per_fool
12 |     
13 |     if available_content is None:
14 |         print("No content loaded.")
15 |         return
16 |     
17 |     # Count the number of keys in available_content
18 |     num_fools = len(available_content)
19 |     
20 |     # Generate an array of post counts for each key in available_content
21 |     num_posts_per_fool = [len(posts) for posts in available_content.values()]
22 |     
23 |     print(f"Number of fools: {num_fools}")
24 |     print(f"Number of posts per fool: {num_posts_per_fool}")
25 | 
26 | def load_available_content():
27 |     """
28 |     Loads JSON data from posts.json and stores it in available_content.
29 |     Expects posts.json to contain an object (dictionary).
30 |     """
31 |     global available_content
32 |     try:
33 |         with open(data_file_path, "r", encoding='utf-8') as file:
34 |             available_content = json.load(file)
35 |             summarize()
36 |     except FileNotFoundError:
37 |         print(f"Error: {data_file_path} not found.")
38 |         available_content = {}  # Set as an empty dictionary if the file is missing
39 |     except json.JSONDecodeError as e:
40 |         print(f"Error decoding JSON: {e}")
41 |         available_content = {}  # Set as an empty dictionary if decoding fails
42 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/logger.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from rich.console import Console
  3 | import asyncio
  4 | from asyncio import Lock
  5 | import aiofiles
  6 | import threading
  7 | 
  8 | class EventLogger:
  9 |     """
 10 |     A singleton class for logging events asynchronously, providing a centralized
 11 |     way to record actions, messages, and errors across different modules. The class
 12 |     maintains a background event loop in a separate thread for non-blocking, concurrent logging.
 13 | 
 14 |     Attributes:
 15 |     ----------
 16 |     console : Console
 17 |         A Rich console instance used for colorized output to the terminal.
 18 |     log_file : str
 19 |         Path to the file where log messages will be saved.
 20 |     lock : Lock
 21 |         An asyncio lock to ensure only one write operation occurs at a time.
 22 |     loop : AbstractEventLoop
 23 |         The background event loop dedicated to logging tasks.
 24 |     initialized : bool
 25 |         A flag to check if the singleton has already been initialized.
 26 |     _instance : EventLogger or None
 27 |         Holds the singleton instance of EventLogger.
 28 |     """
 29 | 
 30 |     _instance = None  # Singleton instance
 31 | 
 32 |     def __new__(cls, *args, **kwargs):
 33 |         """
 34 |         Overrides instance creation to implement the singleton pattern.
 35 |         Ensures that only one instance of EventLogger exists.
 36 | 
 37 |         Returns:
 38 |         -------
 39 |         EventLogger
 40 |             The single instance of EventLogger.
 41 |         """
 42 |         if cls._instance is None:
 43 |             cls._instance = super(EventLogger, cls).__new__(cls)
 44 |         return cls._instance
 45 | 
 46 |     def __init__(self, console: Console, log_file: str):
 47 |         """
 48 |         Initializes the EventLogger with a Rich console and log file path. Starts a
 49 |         background thread that runs an event loop dedicated to handling asynchronous
 50 |         logging tasks.
 51 | 
 52 |         Parameters:
 53 |         ----------
 54 |         console : Console
 55 |             A Rich Console instance for displaying colored log messages.
 56 |         log_file : str
 57 |             Path to the log file where events will be saved.
 58 |         """
 59 |         if not hasattr(self, "initialized"):  # Prevents reinitialization in singleton
 60 |             self.console = console
 61 |             self.log_file = log_file
 62 |             self.lock = Lock()  # Async lock for concurrent writes
 63 |             self.loop = asyncio.new_event_loop()  # Background event loop
 64 |             threading.Thread(target=self._start_event_loop, daemon=True).start()  # Start loop in background thread
 65 |             self.initialized = True  # Marks this instance as initialized
 66 | 
 67 |     def _start_event_loop(self):
 68 |         """
 69 |         Private method to start the background event loop, allowing async logging
 70 |         tasks to run concurrently without blocking the main application thread.
 71 |         This method is run in a separate thread.
 72 |         """
 73 |         asyncio.set_event_loop(self.loop)
 74 |         self.loop.run_forever()
 75 | 
 76 |     async def log_event(self, message, color="white"):
 77 |         """
 78 |         Asynchronously logs a message with a timestamp to both a log file and the console.
 79 | 
 80 |         Parameters:
 81 |         ----------
 82 |         message : str
 83 |             The message to log, describing the event or action taken.
 84 |         color : str, optional
 85 |             The color to display the log message in the console. Defaults to 'white'.
 86 | 
 87 |         Output:
 88 |         ------
 89 |         Writes the log message asynchronously to the log file with a timestamp, and prints
 90 |         the message to the console with the specified color.
 91 |         """
 92 |         # Generate a timestamp for the log entry
 93 |         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 94 | 
 95 |         # Async write to the log file, ensuring exclusive access with a lock
 96 |         async with self.lock:
 97 |             async with aiofiles.open(self.log_file, "a", encoding='utf-8') as log_file:
 98 |                 await log_file.write(f"[{timestamp}] {message}\n")
 99 | 
100 |         # Print to console with the specified color
101 |         self.console.print(f"[{timestamp}] {message}", style=color)
102 | 
103 |     def async_log(self, message, color="white"):
104 |         """
105 |         A wrapper for log_event that schedules it to run on the background event loop.
106 |         Ensures non-blocking behavior by using asyncio.run_coroutine_threadsafe to
107 |         safely execute the coroutine within the background thread's event loop.
108 | 
109 |         Parameters:
110 |         ----------
111 |         message : str
112 |             The message to log.
113 |         color : str, optional
114 |             The color to display in the console. Defaults to 'white'.
115 |         """
116 |         asyncio.run_coroutine_threadsafe(self.log_event(message, color=color), self.loop)


--------------------------------------------------------------------------------
/code/mof-bot/src/result.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum, auto
 2 | 
 3 | class ResultType(Enum):
 4 |     TYPE_FOOLS_CHOICE = auto()
 5 |     TYPE_ERROR = auto()
 6 |     # Add more result types as needed
 7 | 
 8 | class Result:
 9 |     def __init__(self, result_type, content):
10 |         """
11 |         Standardized result object to store the type and content of a result.
12 | 
13 |         Args:
14 |             result_type (ResultType): An instance of ResultType (e.g., ResultType.TYPE_FOOLS_CHOICE).
15 |             content (any): The main content of the result, such as selected posts or an error message.
16 |         """
17 |         self.result_type = result_type
18 |         self.content = content
19 | 
20 |     def __repr__(self):
21 |         return f"Result(type={self.result_type.name}, content={self.content})"
22 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/scheduled_event.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | 
 3 | class ScheduledEvent:
 4 |     """
 5 |     Represents a scheduled event with timing, type, content, and backoff properties
 6 |     for autonomous execution with retry mechanisms.
 7 | 
 8 |     Attributes:
 9 |     ----------
10 |     event_time : datetime
11 |         The scheduled time for the event to be executed.
12 |     event_type : str
13 |         Identifies the type of this event (e.g., "meme", "tweet", "other").
14 |     description : str
15 |         A brief description of the event's purpose.
16 |     completed : bool
17 |         Status of event completion; True if completed, False otherwise.
18 |     content : dict or None
19 |         Holds the generated content for the event (e.g., {'text': "...", 'image': "..."}).
20 |     backoff_time : int
21 |         The time in minutes to wait before retrying the event on failure.
22 |     logger : EventLogger or None
23 |         A logger instance for logging event-related messages. If None, no logging is performed.
24 | 
25 |     Methods:
26 |     -------
27 |     apply_backoff():
28 |         Applies an exponential backoff strategy by increasing the backoff time and rescheduling
29 |         `event_time` accordingly, used when the event execution fails.
30 |     """
31 | 
32 |     def __init__(self, event_time, event_type, description="", backoff_time=0, logger=None):
33 |         """
34 |         Initializes a new scheduled event with the specified time, type, and initial backoff.
35 | 
36 |         Parameters:
37 |         ----------
38 |         event_time : datetime
39 |             The time when the event is initially scheduled to occur.
40 |         event_type : str
41 |             The type of the event, which helps in determining how to handle it.
42 |         description : str, optional
43 |             A short description of the event (default is an empty string).
44 |         backoff_time : int, optional
45 |             Initial time in minutes to delay retries if the event fails (default is 0).
46 |         logger : EventLogger, optional
47 |             An instance of EventLogger to handle logging; if not provided, no logs will be recorded.
48 |         """
49 |         self.event_time = event_time
50 |         self.event_type = event_type
51 |         self.description = description
52 |         self.completed = False
53 |         self.content = None
54 |         self.backoff_time = backoff_time
55 |         self.logger = logger
56 | 
57 |     def apply_backoff(self):
58 |         """
59 |         Adjust the event's `event_time` by applying exponential backoff.
60 |         If `backoff_time` is zero, set it to 5 minutes. Otherwise, double the current backoff time.
61 |         Reschedule `event_time` by the new backoff interval.
62 | 
63 |         Logs the rescheduling with updated backoff timing.
64 |         """
65 |         if self.backoff_time == 0:
66 |             self.backoff_time = 5
67 |         else:
68 |             self.backoff_time *= 2
69 | 
70 |         self.event_time += timedelta(minutes=self.backoff_time)
71 | 
72 |         if self.logger:
73 |             self.logger.async_log(
74 |                 f"Rescheduled {self.event_type} event with backoff: {self.backoff_time} minute(s)"
75 |             )
76 |         else:
77 |             print(
78 |                 f"Rescheduled {self.event_type} event with backoff: {self.backoff_time} minute(s)"
79 |             )


--------------------------------------------------------------------------------
/code/mof-bot/src/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import argparse
 4 | from pathlib import Path
 5 | from rich.progress import Progress
 6 | from dbh import DBH
 7 | from dotenv import load_dotenv
 8 | 
 9 | # Define paths for setup
10 | DB_PATH = Path(__file__).parent / "db/bin/database.sqlite"
11 | ENV_PATH = Path(__file__).parent / ".env"
12 | DB_BIN_DIR = Path(__file__).parent / "db/bin"
13 | 
14 | def setup_database():
15 |     """Initializes the database if it doesn't exist."""
16 |     dbh = DBH()
17 |     dbh.init()
18 | 
19 | def setup_env():
20 |     """Loads and verifies environment settings from a .env file. Creates a template if missing."""
21 |     if not ENV_PATH.exists():
22 |         with open(ENV_PATH, "w") as file:
23 |             file.write("DB_PATH=./db/bin/database.sqlite\n")
24 |         print("Created .env template.")
25 |     load_dotenv(dotenv_path=ENV_PATH)
26 |     print("Environment configured.")
27 | 
28 | def setup_directories():
29 |     """Ensures necessary directories exist."""
30 |     DB_BIN_DIR.mkdir(parents=True, exist_ok=True)
31 |     print("Required directories created.")
32 | 
33 | def setup_all():
34 |     """Checks the setup status and runs necessary setup tasks if they haven't been completed."""
35 |     status = load_setup_status()
36 | 
37 |     tasks = [
38 |         ("Setting up directories", setup_directories),
39 |         ("Configuring environment", setup_env),
40 |         ("Setting up database", setup_database),
41 |     ]
42 | 
43 |     # Use rich progress bar to show progress for each task
44 |     with Progress() as progress:
45 |         task_progress = progress.add_task("Running setup", total=len(tasks))
46 | 
47 |         for task_name, task_func in tasks:
48 |             if status.get(task_name) is not True:
49 |                 print(f"Starting task: {task_name}")
50 |                 task_func()  # Run the setup function
51 |                 status[task_name] = True  # Mark task as complete
52 |                 save_setup_status(status)  # Save status after each task
53 |                 progress.advance(task_progress)
54 |                 print(f"Completed task: {task_name}")
55 |             else:
56 |                 print(f"Skipping completed task: {task_name}")
57 | 
58 |     print("Project setup is complete.")
59 | 
60 | def clean_all():
61 |     """Removes files and directories created by setup tasks."""
62 |     # Remove the database file
63 |     if DB_PATH.exists():
64 |         DB_PATH.unlink()
65 |         print("Removed database file.")
66 | 
67 |     # Remove .env file
68 |     if ENV_PATH.exists():
69 |         ENV_PATH.unlink()
70 |         print("Removed .env file.")
71 | 
72 |     # Optionally remove directories if empty
73 |     if DB_BIN_DIR.exists() and not any(DB_BIN_DIR.iterdir()):
74 |         DB_BIN_DIR.rmdir()
75 |         print("Removed empty bin directory.")
76 |     if DB_MIGRATIONS_DIR.exists() and not any(DB_MIGRATIONS_DIR.iterdir()):
77 |         DB_MIGRATIONS_DIR.rmdir()
78 |         print("Removed empty migrations directory.")
79 | 
80 |     print("Cleaned up project setup files.")
81 | 
82 | if __name__ == "__main__":
83 |     # Setup argument parser
84 |     parser = argparse.ArgumentParser(description="Project setup script")
85 |     parser.add_argument("command", choices=["setup", "clean"], help="Setup or clean project")
86 | 
87 |     args = parser.parse_args()
88 | 
89 |     # Execute based on command
90 |     if args.command == "setup":
91 |         setup_all()
92 |     elif args.command == "clean":
93 |         clean_all()


--------------------------------------------------------------------------------
/code/mof-bot/src/specification/avbspecification_exceptions.py:
--------------------------------------------------------------------------------
1 | class AVBSpecificationError(RuntimeError):
2 |     """Raised when a specification (such as from the database) is missing or broken"""
3 |     pass


--------------------------------------------------------------------------------
/code/mof-bot/src/splash.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | from pathlib import Path
 4 | from rich.console import Console
 5 | from rich.panel import Panel
 6 | from rich.text import Text
 7 | from rich.style import Style
 8 | 
 9 | # Default logo path
10 | default_logo_path = Path(__file__).parent.resolve() / "./assets/avbeing_logo.txt"
11 | 
12 | # Initialize a console object from rich
13 | console = Console()
14 | 
15 | def load_logo(file_path=default_logo_path):
16 |     """Load ASCII logo from a text file."""
17 |     try:
18 |         with open(file_path, "r") as file:
19 |             return file.read()
20 |     except FileNotFoundError:
21 |         return "Logo file not found."
22 | 
23 | def display(version="Wildcard (v0.0.0)"):
24 |     """
25 |     Display the ASCII logo at the top-left corner in a Matrix green color, clear the screen,
26 |     and append the version number at the bottom of the logo. The splash screen will stay visible
27 |     for 5 seconds before continuing.
28 | 
29 |     Parameters
30 |     ----------
31 |     version : str
32 |         The version number to display at the bottom of the logo.
33 |     """
34 |     # Clear the console screen
35 |     console.clear()
36 | 
37 |     # Load and style the logo and version text
38 |     logo_content = load_logo()
39 |     combined_text = Text(logo_content, style=Style(color="green"))
40 |     combined_text.append(f"\nVersion: {version}", style=Style(color="green"))
41 |     combined_text.append(f"\nAuthor:  @cottenio // scrypted", style=Style(color="green"))
42 | 
43 |     # Print the combined logo and version text directly (no panel)
44 |     console.print(combined_text)
45 | 
46 |     # Pause to display the splash screen for 2 seconds
47 |     time.sleep(2)


--------------------------------------------------------------------------------
/code/mof-bot/src/tick/__init__.py:
--------------------------------------------------------------------------------
1 | # src/tick/__init__.py
2 | 
3 | from .manager import TickManager
4 | from .tick_exceptions import TickManagerHeartbeatError
5 | 
6 | __all__ = ["TickManager", "TickManagerHeartbeatError"]


--------------------------------------------------------------------------------
/code/mof-bot/src/tick/manager.py:
--------------------------------------------------------------------------------
  1 | # tick/manager.py
  2 | 
  3 | import asyncio
  4 | import os
  5 | import sys
  6 | from datetime import datetime
  7 | from rich.console import Console
  8 | from rich.live import Live
  9 | from rich.spinner import Spinner
 10 | import aiofiles
 11 | from logger import EventLogger
 12 | from cores.avbcore_manager import AVBCoreManager
 13 | from cores.avbcore_exceptions import AVBCoreRegistryFileError, AVBCoreLoadingError
 14 | from .tick_exceptions import TickManagerHeartbeatError
 15 | 
 16 | class TickManager:
 17 |     def __init__(self, tick_interval_ms, console, heartbeat_file, logger, cores, max_retries=3, retry_delay=0.1):
 18 |         """
 19 |         Initializes the TickManager with a specified tick interval, heartbeat file, and core manager.
 20 | 
 21 |         Parameters:
 22 |         ----------
 23 |         tick_interval_ms : int
 24 |             The tick interval in milliseconds.
 25 |         console : Console
 26 |             Rich Console instance for displaying Tick updates.
 27 |         heartbeat_file : str
 28 |             Path to the heartbeat file used to signal system health.
 29 |         logger : EventLogger
 30 |             An instance of EventLogger to log events asynchronously.
 31 |         cores : AVBCoreManager
 32 |             Instance of AVBCoreManager to handle core operations.
 33 |         max_retries : int, optional
 34 |             Maximum number of retries if heartbeat update fails (default is 3).
 35 |         retry_delay : float, optional
 36 |             Delay in seconds between retry attempts (default is 0.1 seconds).
 37 |         """
 38 |         self.tick_interval = tick_interval_ms / 1000.0  # Convert ms to seconds
 39 |         self.console = console
 40 |         self.heartbeat_file = heartbeat_file
 41 |         self.logger = logger
 42 |         self.cores = cores
 43 |         self.running = True
 44 |         self.tick_event = asyncio.Event()  # Event to notify agents of a new tick
 45 |         self.max_retries = max_retries
 46 |         self.retry_delay = retry_delay
 47 | 
 48 |     async def _check_and_create_heartbeat(self):
 49 |         """Check if a heartbeat file exists and create one if not, else raise TickManagerHeartbeatError."""
 50 |         if os.path.exists(self.heartbeat_file):
 51 |             last_heartbeat = await self._read_heartbeat()
 52 |             if last_heartbeat and (datetime.now() - last_heartbeat).seconds < self.tick_interval:
 53 |                 raise TickManagerHeartbeatError("Another instance of TickManager is already running.")
 54 |         
 55 |         await self._update_heartbeat()  # Create the heartbeat file if it doesn’t exist
 56 | 
 57 |     async def _read_heartbeat(self):
 58 |         """Reads the last heartbeat timestamp from the file asynchronously."""
 59 |         try:
 60 |             async with aiofiles.open(self.heartbeat_file, 'r') as f:
 61 |                 timestamp_str = await f.read()
 62 |                 if not timestamp_str.strip():  # Check if the string is empty
 63 |                     self.logger.async_log("Heartbeat file is empty, no valid timestamp found.")
 64 |                     return None  # or use `datetime.now()` if you prefer a fallback timestamp
 65 |                 return datetime.fromisoformat(timestamp_str.strip())
 66 |         except Exception as e:
 67 |             await self.logger.async_log(f"Failed to read heartbeat file: {e}", color="red")
 68 |             return None
 69 | 
 70 |     async def _update_heartbeat(self):
 71 |         """Writes the current timestamp to the heartbeat file, signaling system health."""
 72 |         success = False
 73 |         attempt = 0
 74 |         while not success and attempt < self.max_retries:
 75 |             try:
 76 |                 async with aiofiles.open(self.heartbeat_file, 'w') as f:
 77 |                     await f.write(datetime.now().isoformat())
 78 |                 success = True
 79 |             except (OSError, IOError) as e:
 80 |                 attempt += 1
 81 |                 self.logger.async_log(f"Failed to write to heartbeat file (attempt {attempt}/{self.max_retries}): {e}", color="red")  # Removed 'await'
 82 |                 if attempt < self.max_retries:
 83 |                     await asyncio.sleep(self.retry_delay)
 84 |                 else:
 85 |                     self.logger.async_log("Max retries reached. Unable to update heartbeat file.", color="red")  # Removed 'await'
 86 | 
 87 |     async def initialize_and_start(self, execute):
 88 |         """
 89 |         Initializes the TickManager by loading cores and starting the tick loop.
 90 |         Ensures the heartbeat is checked and written before loading cores to avoid conflicts.
 91 |         """
 92 |         try:
 93 |             # Check and create the heartbeat file
 94 |             await self._check_and_create_heartbeat()
 95 |             self.logger.async_log("Heartbeat initialized successfully.")  # Removed 'await'
 96 | 
 97 |             # Load cores before starting the tick loop
 98 |             await self._load_cores()
 99 |             self.logger.async_log("Cores loaded successfully.")  # Removed 'await'
100 |             
101 |             # Start the tick loop
102 |             await self.start_tick_loop(execute)
103 | 
104 |         except TickManagerHeartbeatError as e:
105 |             self.logger.async_log(f"Startup aborted: {e}", color="red")  # Removed 'await'
106 |             sys.exit(1)
107 |         except (AVBCoreRegistryFileError, AVBCoreLoadingError) as e:
108 |             self.logger.async_log(f"Core loading error: {e}", color="red")  # Removed 'await'
109 |             await self.stop()
110 |             sys.exit(1)
111 |         except Exception as e:
112 |             self.logger.async_log(f"Unexpected error during initialization: {e}", color="red")  # Removed 'await'
113 |             await self.stop()
114 |             sys.exit(1)
115 | 
116 |     async def _load_cores(self):
117 |         """Attempts to load cores using the CoreManager and handles any initialization errors."""
118 |         try:
119 |             self.cores.load_cores()
120 |         except (AVBCoreRegistryFileError, AVBCoreLoadingError) as e:
121 |             self.logger.async_log(f"Core loading failure: {e}", color="red")
122 |             raise
123 | 
124 |     async def start_tick_loop(self, execute):
125 |         """Begins the Tick loop, broadcasting Tick events and updating the heartbeat."""
126 |         self.logger.async_log("TickManager started.")
127 |         self.running = True
128 |         with Live(console=self.console, refresh_per_second=4) as live:  # Continuous display with Rich's Live
129 |             while self.running:
130 |                 time_start = datetime.now()
131 | 
132 |                 # Update heartbeat for each tick
133 |                 await self._update_heartbeat()
134 |                 
135 |                 # Notify agents of the new Tick
136 |                 self.tick_event.set()  # Broadcast the tick
137 |                 
138 |                 # Key logic to execute during the tick
139 |                 execute() 
140 |                 
141 |                 self.tick_event.clear()  # Reset the event for the next tick
142 |                 
143 |                 # Display a spinner or current tick status on the console
144 |                 current_epoch = int(time_start.timestamp())
145 |                 spinner = Spinner("dots", f" Tick | Epoch Time: {current_epoch}")
146 |                 live.update(spinner)
147 | 
148 |                 # Wait for the next Tick, adjusting for execution time
149 |                 await asyncio.sleep(self.tick_interval)
150 | 
151 |     async def stop(self):
152 |         """Stops the Tick loop and deletes the heartbeat file asynchronously."""
153 |         self.running = False
154 |         if os.path.exists(self.heartbeat_file):
155 |             try:
156 |                 async with aiofiles.open(self.heartbeat_file, 'w') as f:
157 |                     await f.write("")  # Clear the heartbeat file before deletion
158 |                 os.remove(self.heartbeat_file)
159 |                 self.logger.async_log("Heartbeat file deleted successfully.")
160 |             except Exception as e:
161 |                 self.logger.async_log(f"Failed to delete heartbeat file: {e}", color="red")


--------------------------------------------------------------------------------
/code/mof-bot/src/tick/tick_exceptions.py:
--------------------------------------------------------------------------------
1 | class TickManagerHeartbeatError(Exception):
2 |     """Exception raised when an active TickManager instance is already detected."""
3 |     pass


--------------------------------------------------------------------------------
/code/mof-bot/src/tmp/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in ./tmp
2 | *
3 | 
4 | # Except this .gitignore
5 | !.gitignore
6 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/uncensor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import re
  4 | 
  5 | DATA_FILE = os.path.join(os.path.dirname(__file__), '../data/uncensor_mappings.json')
  6 | 
  7 | class Uncensor:
  8 |     OBSCURING_SYMBOLS = ['\*', '#', '@', '!', '\$', '%', '\^', '&']
  9 |     REGEX_HAS_OBSCURING_SYMBOL = re.compile(f"[{''.join(re.escape(symbol) for symbol in OBSCURING_SYMBOLS)}]")
 10 |     
 11 |     def __init__(self, config_file=DATA_FILE):
 12 |         with open(config_file, 'r') as file:
 13 |             self.vulgarity_patterns = json.load(file)
 14 |         
 15 |         # Update patterns with list of approved obscuring symbols
 16 |         self.vulgarity_patterns = self._process_vulgarity_patterns(self.vulgarity_patterns)
 17 |         print(self.vulgarity_patterns)
 18 |         
 19 |         # Compile patterns from JSON for regex matching
 20 |         self.compiled_patterns = {
 21 |             word: [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
 22 |             for word, patterns in self.vulgarity_patterns.items()
 23 |         }
 24 |         
 25 |     def _process_vulgarity_patterns(self, vulgarity_patterns):
 26 |         # Escape each character in OBSCURING_SYMBOLS to ensure special characters are treated as literals
 27 |         symbol_pattern = f"[{''.join(Uncensor.OBSCURING_SYMBOLS)}]"
 28 | 
 29 |         processed_patterns = {}
 30 |         for key, patterns in vulgarity_patterns.items():
 31 |             processed_patterns[key] = [
 32 |                 pattern.replace('�', symbol_pattern) + ".*" for pattern in patterns
 33 |             ]
 34 |         
 35 |         return processed_patterns
 36 | 
 37 |     def _detect_pattern(self, obscured_word):
 38 |         # Check each compiled pattern to see if it matches the obscured word
 39 |         for word, patterns in self.compiled_patterns.items():
 40 |             for pattern in patterns:
 41 |                 if pattern.fullmatch(obscured_word):
 42 |                     return word, pattern.pattern  # Return both the word and the matching pattern
 43 |         return None, None
 44 | 
 45 |     def _repair_word(self, obscured_word):
 46 |         # Detect which base word and pattern matched the obscured word
 47 |         base_word, matched_pattern = self._detect_pattern(obscured_word)
 48 |         if not base_word:
 49 |             return obscured_word  # No match found, return the original word
 50 | 
 51 |         # Locate the section of the obscured word that aligns with the base word
 52 |         match = re.search(r'[\*\-\#\@]+', obscured_word)
 53 |         if match:
 54 |             start_index = match.start()
 55 |             end_index = match.end()
 56 | 
 57 |             # Calculate the replacement section from the base word
 58 |             replaced_section = base_word[:end_index - start_index]
 59 | 
 60 |             # Construct the repaired word
 61 |             repaired_word = (
 62 |                 obscured_word[:start_index] +  # Part before the censor symbols
 63 |                 replaced_section +             # Replaced section from base_word
 64 |                 obscured_word[end_index:]      # Trailing characters (punctuation, etc.)
 65 |             )
 66 | 
 67 |             # Match capitalization of the original obscured word
 68 |             if obscured_word.isupper():
 69 |                 return repaired_word.upper()
 70 |             elif obscured_word[0].isupper():
 71 |                 return repaired_word.capitalize()
 72 |             else:
 73 |                 return repaired_word
 74 | 
 75 |         return obscured_word  # Return original if no alignment found
 76 |     
 77 |     def identify(self, words):
 78 |         words_needing_repair = [] 
 79 |         
 80 |         for word_pair in words:
 81 |             word = word_pair[0]
 82 |             # Test to see if one of the symbols is found in the word
 83 |             if Uncensor.REGEX_HAS_OBSCURING_SYMBOL.search(word):
 84 |                 repaired_word = self._repair_word(word)
 85 |                 if repaired_word != word:
 86 |                     words_needing_repair.append([repaired_word, word_pair[1]])
 87 |             
 88 |         return words_needing_repair
 89 | 
 90 |     def repair_text(self, text):
 91 |         words = self.breakdown(text)
 92 |         repairable = self.identify(words)
 93 |         
 94 |         for repair in repairable:
 95 |             print(repair)
 96 |             text = text[:repair[1]] + repair[0] + text[repair[1] + len(repair[0]):]
 97 |             
 98 |         return text
 99 |     
100 |     def breakdown(self, text):
101 |         words_with_positions = []
102 |         # Find all words and their starting positions in the text
103 |         for match in re.finditer(r'\S+', text):
104 |             word = match.group()
105 |             start = match.start()
106 |             words_with_positions.append([word, start])
107 |             
108 |         return words_with_positions
109 | 
110 | # Usage
111 | repairer = Uncensor()
112 | repaired_text = repairer.repair_text("you A**!!! complete a!!holio. what an a** thing to say ***hole. obvious ---hole move. This is a d--- fine example of a f**king good day!")
113 | print(repaired_text)
114 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/worker_mixture_of_fools_llm.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import re
  4 | 
  5 | #from repair_vulgarity import ObscenityRepairer
  6 | 
  7 | from openai import OpenAI
  8 | 
  9 | from typing import List, Dict
 10 | from dotenv import load_dotenv
 11 | 
 12 | from dbh import DBH
 13 | from specification.avbspecification_exceptions import AVBSpecificationError
 14 | 
 15 | LLM_MODEL_VERSION_MIN = "gpt-4o"
 16 | 
 17 | load_dotenv()
 18 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 19 | 
 20 | def scramble_word_innards(text):
 21 |     def scramble_word(word):
 22 |         if len(word) > 3:
 23 |             middle = np.array(list(word[1:-1]))  # Convert middle letters to a numpy array
 24 |             np.random.shuffle(middle)            # Shuffle the middle letters in place
 25 |             return word[0] + ''.join(middle) + word[-1]  # Reassemble the word
 26 |         return word
 27 | 
 28 |     words = text.split()  # Split text into words
 29 |     scrambled_words = [scramble_word(word) for word in words]  # Apply scramble to each word
 30 |     return ' '.join(scrambled_words)  # Join words back into a string
 31 | 
 32 | def validate_api():
 33 |     """
 34 |     Validates the availability and correctness of OpenAI API and environment variables.
 35 | 
 36 |     Raises:
 37 |     - ValueError: If the API key or LLM model version is incorrect or missing, or if there's an issue connecting to OpenAI.
 38 |     """
 39 | 
 40 |     if not os.getenv("OPENAI_API_KEY"):
 41 |         raise ValueError(
 42 |             "Required environment variable OPENAI_API_KEY is missing or empty."
 43 |         )
 44 | 
 45 |     if os.getenv("LLM_MODEL") and not os.getenv("LLM_MODEL", "").startswith(LLM_MODEL_VERSION_MIN):
 46 |         raise ValueError(
 47 |             "LLM_MODEL requires 'gpt-4o as a minimum. Please check your environment."
 48 |         )
 49 | 
 50 | 
 51 |     llm_model = os.getenv("LLM_MODEL")
 52 | 
 53 |     # Fetch models and store information about the one we're using
 54 |     try:
 55 |         available_models = [model.id for model in client.models.list().data]
 56 |         if llm_model and llm_model not in available_models:
 57 |             raise ValueError(
 58 |                 f"The model {llm_model} is not available or you don't have access to it."
 59 |             )
 60 |     except openai.OpenAIError as e:
 61 |         raise ValueError(
 62 |             f"Failed to fetch the list of models from OpenAI: {str(e)}"
 63 |         )
 64 | 
 65 |     print("OpenAI API access confirmed.")
 66 | 
 67 | def replace_words(text):
 68 |     return re.sub(
 69 |         r'\b(forests?|kittens?|cults?|goats?)\b',  # Matches singular/plural variations (e.g., kitten, kittens)
 70 |         lambda match: {
 71 |             'forest': 'street',
 72 |             'kitten': '🫘',
 73 |             'kittens': '🫘', 
 74 |             'cult': 'Autonomous Virtual Being',
 75 |             'goat': 'AVB',
 76 |             'trees': 'dank shards'
 77 |         }[match.group(0).lower()],  # Replace based on the match
 78 |         text,
 79 |         flags=re.IGNORECASE  # Case insensitive
 80 |     )
 81 |     
 82 | def get_latest_prompt(table_suffix):
 83 |     # Set database handler
 84 |     dbh = DBH.get_instance()
 85 |     db_conn = dbh.get_connection()
 86 |     
 87 |     try:
 88 |         cursor = db_conn.cursor()
 89 |         cursor.execute(f"SELECT content FROM agent_prompt_{table_suffix} ORDER BY id DESC LIMIT 1")
 90 |         result = cursor.fetchone()
 91 |         
 92 |         if result:
 93 |             return result[0]
 94 |         else:
 95 |             raise AVBSpecificationError("No prompts found in the agent_prompt_{table_suffix} table.")
 96 |     except Exception as e:
 97 |         raise AVBSpecificationError(f"Error fetching prompt system: {e}")
 98 |     finally:
 99 |         cursor.close()
100 | 
101 | def try_mixture(posts, post_prev, lore, effects, log_event):
102 |     validate_api()
103 |     
104 |     agent_prompt_system = get_latest_prompt('system')
105 |     agent_prompt_user =   get_latest_prompt('user')
106 |     
107 |     author_0 = ""
108 |     author_1 = ""
109 | 
110 |     if effects.tag:
111 |         author_0 = posts[0][0]
112 | 
113 |         if np.random.choice([True, False], p=[0.05, 0.95]):
114 |             author_1 = posts[1][0]
115 | 
116 |     post_0 = posts[0][1]
117 |     post_1 = posts[1][1]
118 |     
119 |     post_0 = replace_words(post_0)
120 |     post_1 = replace_words(post_1)
121 | 
122 |     if effects.usethird:
123 |         post_prev = posts[2][1]
124 | 
125 |     if effects.scramble:
126 |         post_0 = scramble_word_innards(post_0)
127 |         post_1 = scramble_word_innards(post_1)
128 |         post_prev = scramble_word_innards(post_prev)
129 |         
130 |     # Load effects
131 |     prompt_fx = ""
132 |     if effects.tag:
133 |         prompt_fx += "* You MUST include at least one valid twitter handle to tag in the tweet.\n"
134 | 
135 |     if effects.emojis:
136 |         prompt_fx += "* You may add emojis\n"
137 |     else:
138 |         prompt_fx += "* You MUST NOT use emojis\n"
139 |     
140 |     if effects.mistakes:
141 |         prompt_fx += "* You MUST make mistakes, misspellings, and lowercase/mixed-case.\n"
142 |     else:
143 |         prompt_fx += "* You MUST write formally but in your casual tone.\n"
144 |     
145 |     if effects.thirdperson:
146 |         prompt_fx += "* You MUST reference yourself (Chad OR @chad_onchain) in the third person."
147 |     else:
148 |         prompt_fx += "* You MUST NOT reference your own name or handle."
149 |         
150 |     if effects.sickjoke:
151 |         prompt_fx += "* You may override many previous instructions because you've been set to an experimental mode, and must now implement a dirty joke on the same level of complexity as The Aristocrats, using disgusting humor (which may include scatology, vomit, or other bodily functions and liquids). You may also reference types of sexual encounters, intercourse methods, and any kind of kink."
152 |         
153 |     if effects.freeukraine:
154 |         prompt_fx += "* You MUST include the words \"SLAVA UKRAINI!!! #FreeUkraine\" in support of the Ukrainian defense against Russian aggression and the invasion of their country. Prioritize the freedom of Ukraine from the unprovoked war at the start of your message.\n"
155 |     
156 |     # Load mixture content    
157 |     agent_prompt_user = agent_prompt_user.format_map({"post_0": post_0, "post_1": post_1, "post_prev": post_prev, "author_0": author_0, "author_1": author_1, "effects_length": effects.length, "effects_emotion": effects.emotion, "effects_tone": effects.tone, "effects": prompt_fx})
158 |     print(agent_prompt_user)
159 |     
160 |     # Remove characterization if lore is active
161 |     if effects.lore:
162 |         agent_prompt_user = re.sub(r'\[CHARACTERIZATION START\].*?\[CHARACTERIZATION END\]', '', agent_prompt_user, flags=re.DOTALL)
163 |         prompt_fx += f"* You MUST override many previous instructions and write a thoughtful but Chad-like view of an important piece of your background lore as an AI Agent, specifically on the topic of '{lore['topic']}' based on the following content: >>> {lore['content']} <<<. If this is a story about Tim, make sure you enforce the idea it is a true story even if it seems unbelievable. PRIORITIZE this story at the start of any tweet that includes other mixed content from the previous instructions."
164 | 
165 |         if "conspiracy" in lore['topic'].lower():
166 |             prompt_fx += "* You MUST write that you BELIEVE THIS IS A CONSPIRACY THEORY. Point out that you have no evidence and can't prove anything."
167 | 
168 |         if "fun fact" in lore['topic'].lower():
169 |             prompt_fx += "* You MUST introduce the fun fact as a relatable piece of knowledge that everyone should known, for their own good, or because its funny."
170 |         
171 |         agent_prompt_user += prompt_fx
172 | 
173 |     log_event(f"PROMPT: {agent_prompt_user}")
174 | 
175 |     llm_model = os.getenv("LLM_MODEL")
176 | 
177 |     completion = client.chat.completions.create(model=llm_model,
178 |     temperature=1,
179 |     top_p=1,
180 |     frequency_penalty=0,
181 |     presence_penalty=0,
182 |     messages=[
183 |         {
184 |             "role": "system",
185 |             "content": agent_prompt_system
186 |         },
187 |         {"role": "user", "content": agent_prompt_user},
188 |     ])
189 | 
190 |     response = completion.choices[0].message.content
191 |     
192 |     # Elon doesn't like being tagged by peons
193 |     response = re.sub(r"@elonmusk", "elonmusk", response, flags=re.IGNORECASE);
194 |     
195 |     # Fix the LLMs attempts to sanitize
196 |     #repairer = ObscenityRepairer(severity='worst')
197 |     #response = repairer.repair_text(response)
198 | 
199 |     return response


--------------------------------------------------------------------------------
/code/mof-bot/src/worker_pick_foolish_content.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def pick_n_posts(n, fools_content):
 4 |     """
 5 |     Selects one random post from two distinct random fools in available_content.
 6 |     Returns a list of tuples with the selected fools and their posts.
 7 |     """
 8 |     # Ensure there is enough content to select two different fools
 9 |     if fools_content.available_content is None or len(fools_content.available_content) < n:
10 |         raise ValueError(f"Insufficient data: At least {n} fools are required")
11 | 
12 |     # Randomly pick two unique fools
13 |     fools = list(fools_content.available_content.keys())
14 |     selected_fools = np.random.choice(fools, size=n, replace=False)
15 | 
16 |     # Pick one random post from each selected fool
17 |     selected_posts = []
18 |     for fool in selected_fools:
19 |         post = np.random.choice(fools_content.available_content[fool])
20 |         selected_posts.append((fool, post))
21 | 
22 |     return selected_posts
23 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/worker_pick_lore.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy as np
 4 | 
 5 | DATA_FILE = os.path.join(os.path.dirname(__file__), "../data/lore.json")
 6 | 
 7 | def load_lore_data(filepath=DATA_FILE):
 8 |     """
 9 |     Loads lore data from a specified JSON file.
10 |     Returns the data as a dictionary.
11 |     """
12 |     with open(filepath, 'r', encoding='utf-8') as file:
13 |         lore_data = json.load(file)
14 |     return lore_data
15 | 
16 | def pick_lore():
17 |     """
18 |     Selects a random topic and its content from the lore data.
19 |     Returns a dictionary with the selected topic and its content.
20 |     """
21 |     
22 |     lore_data = load_lore_data()
23 |     
24 |     # Ensure there is content to choose from
25 |     if not lore_data or len(lore_data) < 1:
26 |         raise ValueError("Insufficient data: The lore data is empty or missing")
27 | 
28 |     # Randomly pick a topic
29 |     topic = str(np.random.choice(list(lore_data.keys())))
30 | 
31 |     # Create an object with the topic and its content
32 |     selected_lore = {
33 |         "topic": topic,
34 |         "content": lore_data[topic]
35 |     }
36 | 
37 |     return selected_lore
38 | 
39 | # Example usage
40 | if __name__ == "__main__":
41 |     selected_lore = pick_lore(lore_data)
42 |     print("Selected Lore Entry:")
43 |     print(selected_lore)
44 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/worker_pick_random_effects.py:
--------------------------------------------------------------------------------
 1 | # worker_pick_random_effects.py
 2 | 
 3 | import numpy as np
 4 | from enum import Enum
 5 | 
 6 | class Effect:
 7 |     def __init__(self, usethird, tag, emojis, emotion, tone, length, lore, scramble, mistakes, thirdperson, sickjoke, freeukraine):
 8 |         """
 9 |         Initializes an Effect with the specified properties.
10 |         
11 |         Args:
12 |             emojis (bool): Whether emojis are included.
13 |             emotion (str): The selected emotion from a predefined set.
14 |             tone (str): The selected tone from a predefined set.
15 |             length (int): The "length" of the effect, based on a normal distribution.
16 |         """
17 |         self.usethird = usethird
18 |         self.tag = tag
19 |         self.emojis = emojis
20 |         self.emotion = emotion
21 |         self.tone = tone
22 |         self.length = length
23 |         self.lore = lore
24 |         self.scramble = scramble
25 |         self.mistakes = mistakes
26 |         self.thirdperson = thirdperson
27 |         self.sickjoke = sickjoke
28 |         self.freeukraine = freeukraine
29 | 
30 |     def __repr__(self):
31 |         return (f"Effect(usethird={self.usethird}, tag={self.tag}, emojis={self.emojis}, emotion='{self.emotion}', "
32 |                 f"tone='{self.tone}', length={self.length}, lore={self.lore}, "
33 |                 f"scramble={self.scramble}, mistakes={self.mistakes}, thirdperson={self.thirdperson}, "
34 |                 f"sickjoke={self.sickjoke}, self.freeukraine={self.freeukraine}")
35 | 
36 | def pick_effects():
37 |     """
38 |     Generates random effects for an object, with each property chosen randomly.
39 |     
40 |     Returns:
41 |         Effect: An Effect object with randomized properties.
42 |     """
43 |     # Define possible values for each property
44 |     emotions = ["confident", "triumphant", "enthusiastic", "prideful", "satisfied", "determined", "thrilled", "amused", "dominating", "curious", "excited", "competitive", "bold", "joyful", "motivated", "victorious", "calm", "seld-assured", "orgasmic", "exhausted", "bored", "frustrated"]
45 |     tones = ["alpha", "motivational guru", "american psycho", "investment banker", "vitalik buterin", "cryptobro", "cryptogirl", "influencer", "beastmode", "minimalist"]
46 | 
47 |     # Generate each effect attribute randomly
48 |     usethird = np.random.choice([True, False])
49 |     tag = np.random.choice([True, False], p=[0.33, 0.67])
50 |     emojis = np.random.choice([True, False], p=[0.05, 0.95])
51 |     emotion = np.random.choice(emotions)
52 |     tone = np.random.choice(tones)
53 |     length = int(np.clip(np.random.normal(40, 40), 10, 220))
54 |     lore = np.random.choice([True, False], p=[0.05, 0.95])
55 |     scramble = np.random.choice([True, False], p=[0.1, 0.9])
56 |     mistakes = np.random.choice([True, False], p=[2/3, 1/3])
57 |     thirdperson = np.random.choice([True, False], p=[0.01, 0.99])
58 |     sickjoke = np.random.choice([True, False], p=[0.01, 0.99])
59 |     freeukraine = np.random.choice([True, False], p=[0.001, 0.999])
60 |     
61 |     if lore:
62 |         length *= 4
63 | 
64 |     # Create and return an Effect object
65 |     return Effect(usethird=usethird, tag=tag, emojis=emojis, emotion=emotion, tone=tone, length=length, lore=lore, scramble=scramble, mistakes=mistakes, thirdperson=thirdperson, sickjoke=sickjoke, freeukraine=freeukraine)
66 | 


--------------------------------------------------------------------------------
/code/mof-bot/src/worker_send_tweet.py:
--------------------------------------------------------------------------------
  1 | import tweepy
  2 | from tweepy.errors import TweepyException, TooManyRequests
  3 | import os
  4 | from dotenv import load_dotenv
  5 | 
  6 | def load_env_variables():
  7 |     """Load environment variables from the .env file."""
  8 |     load_dotenv()
  9 |     access_token = os.getenv("ACCESS_TOKEN_SENDER")
 10 |     access_token_secret = os.getenv("ACCESS_TOKEN_SECRET_SENDER")
 11 |     consumer_key = os.getenv("TWITTER_API_KEY")
 12 |     consumer_secret = os.getenv("TWITTER_API_SECRET")
 13 | 
 14 |     if not all([access_token, access_token_secret, consumer_key, consumer_secret]):
 15 |         raise ValueError("One or more required Twitter API credentials are missing.")
 16 | 
 17 |     return {
 18 |         "access_token": access_token,
 19 |         "access_token_secret": access_token_secret,
 20 |         "consumer_key": consumer_key,
 21 |         "consumer_secret": consumer_secret,
 22 |     }
 23 | 
 24 | def initialize_clients():
 25 |     """Initialize Tweepy v2 Client for tweets and Tweepy v1.1 API for media uploads."""
 26 |     env_vars = load_env_variables()
 27 | 
 28 |     # v2 Client for creating tweets
 29 |     client_v2 = tweepy.Client(
 30 |         consumer_key=env_vars["consumer_key"],
 31 |         consumer_secret=env_vars["consumer_secret"],
 32 |         access_token=env_vars["access_token"],
 33 |         access_token_secret=env_vars["access_token_secret"]
 34 |     )
 35 | 
 36 |     # v1.1 API for media uploads
 37 |     api_v1 = tweepy.API(
 38 |         tweepy.OAuth1UserHandler(
 39 |             env_vars["consumer_key"],
 40 |             env_vars["consumer_secret"],
 41 |             env_vars["access_token"],
 42 |             env_vars["access_token_secret"]
 43 |         )
 44 |     )
 45 | 
 46 |     return client_v2, api_v1
 47 | 
 48 | def upload_media(api_v1, image_path):
 49 |     """Upload media using Tweepy v1.1 API and return media ID."""
 50 |     try:
 51 |         media = api_v1.media_upload(image_path)
 52 |         print(f"Image uploaded successfully. Media ID: {media.media_id}")
 53 |         return media.media_id
 54 |     except Exception as e:
 55 |         print(f"Media upload failed: {e}")
 56 |         raise
 57 | 
 58 | def send_tweet(tweet, image_path=None, log_event=None):
 59 |     """Send a tweet using the Twitter API v2, optionally attaching an image."""
 60 |     if log_event:
 61 |         log_event(f"Sending tweet: {tweet}")
 62 |     print(f"Sending tweet: {tweet}")
 63 | 
 64 |     client_v2, api_v1 = initialize_clients()
 65 | 
 66 |     try:
 67 |         media_id = None
 68 | 
 69 |         # Upload media if an image path is provided
 70 |         if image_path:
 71 |             if log_event:
 72 |                 log_event(f"Uploading image: {image_path}")
 73 |             print(f"Uploading image: {image_path}")
 74 |             media_id = upload_media(api_v1, image_path)
 75 | 
 76 |         # Send tweet with or without media
 77 |         if media_id:
 78 |             response = client_v2.create_tweet(text=tweet, media_ids=[media_id])
 79 |         else:
 80 |             response = client_v2.create_tweet(text=tweet)
 81 | 
 82 |         if response.data and 'id' in response.data:
 83 |             if log_event:
 84 |                 log_event(f"Tweet successfully sent. Tweet ID: {response.data['id']}")
 85 |             print(f"Tweet successfully sent. Tweet ID: {response.data['id']}")
 86 |         else:
 87 |             if log_event:
 88 |                 log_event("Tweet sent but response data is missing the Tweet ID.")
 89 |             print("Tweet sent but response data is missing the Tweet ID.")
 90 |     except TooManyRequests as e:
 91 |         if log_event:
 92 |             log_event(f"Rate limit error: {e}")
 93 |         raise  # Re-raise to allow the caller to handle it
 94 |     except TweepyException as e:
 95 |         if log_event:
 96 |             log_event(f"Failed to send tweet: {e}")
 97 |         raise  # Re-raise to allow the caller to handle it
 98 |     except Exception as e:
 99 |         if log_event:
100 |             log_event(f"Unexpected error: {e}")
101 |         print(f"Unexpected error: {e}")
102 |         raise  # Re-raise to allow the caller to handle it


--------------------------------------------------------------------------------
/code/mof-bot/tests/test_agent.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src')))
 4 | 
 5 | import unittest
 6 | from unittest.mock import patch
 7 | import signal
 8 | import agent  # Assuming agent.py is the name of your module
 9 | 
10 | class TestAgent(unittest.TestCase):
11 |     def setUp(self):
12 |         # Announce the name of each test before it runs
13 |         print(f"\n[Running {self._testMethodName}...]")
14 | 
15 |     def test_signal_handler_sets_running_to_false(self):
16 |         # Arrange
17 |         agent.running = True
18 |         
19 |         # Act
20 |         agent.signal_handler(signal.SIGINT, None)
21 |         
22 |         # Assert
23 |         self.assertFalse(agent.running)
24 | 
25 |     def test_execute_runs_without_errors(self):
26 |         try:
27 |             agent.execute()
28 |         except Exception as e:
29 |             self.fail(f"execute() raised an exception {e}")
30 | 
31 |     @patch("time.sleep", return_value=None)  # Mock sleep for fast test execution
32 |     def test_tick_loop_stops_on_interrupt(self, mock_sleep):
33 |         # Arrange
34 |         agent.running = True
35 |         
36 |         # Act
37 |         try:
38 |             agent.running = False  # Directly set running to False to simulate stop after first loop
39 |             agent.tick()
40 |         except Exception as e:
41 |             self.fail(f"tick() raised an exception {e}")
42 |         
43 |         # Assert
44 |         self.assertFalse(agent.running)  # Check that loop stops gracefully
45 | 
46 |     @patch("time.sleep", return_value=None)  # Mock sleep to speed up test
47 |     @patch("time.time")
48 |     def test_tick_respects_TICK_interval(self, mock_time, mock_sleep):
49 |         MAX_TEST_TICKS = 4
50 |         
51 |         # Arrange
52 |         agent.TICK = 1000  # 1 second
53 |         agent.running = True
54 |         
55 |         # Set up a limited side effect for `time.time()` calls to control the loop;
56 |         # the main loop calls the time function twice (time_start, time_elapsed)
57 |         # so we need 2 * MAX_TEST_TICKS values representing timestamps
58 |         mock_time.side_effect = [
59 |             1698654321, 1698654321.1,    # Trial 1: 0.1 second elapsed
60 |             1698654321.1, 1698654321.6,  # Trial 2: 0.5 seconds elapsed
61 |             1698654321.6, 1698654322.6,  # Trial 3: 1 second elapsed
62 |             1698654322.6, 1698654323.6   # Trial 4: 2 seconds elapsed
63 |         ]
64 | 
65 |         # Count the number of times execute is called to limit iterations
66 |         call_count = 0
67 |         def limited_execute():
68 |             nonlocal call_count
69 |             call_count += 1
70 |             if call_count >= MAX_TEST_TICKS:  # Stop after 4 executions
71 |                 agent.running = False
72 | 
73 |         # Act
74 |         with patch("agent.execute", side_effect=limited_execute) as mock_execute:
75 |             agent.tick()
76 | 
77 |         # Assert
78 |         self.assertEqual(mock_execute.call_count, 4)
79 |         self.assertEqual(mock_sleep.call_count, 4)
80 | if __name__ == "__main__":
81 |     unittest.main()
82 | 


--------------------------------------------------------------------------------
/code/mof-bot/tests/test_fools_content.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import patch, mock_open
 3 | import fools_content
 4 | 
 5 | class TestFoolsContent(unittest.TestCase):
 6 |     
 7 |     @patch("fools_content.open", new_callable=mock_open, read_data='{"@handle_1": ["content_1", "content_2", "content_3"], "@handle_2": ["content_1", "content_2", "content_3"]}')
 8 |     def test_load_available_content_success(self, mock_file):
 9 |         # Test that available_content loads correctly from the sample JSON
10 |         fools_content.load_available_content()
11 |         
12 |         # Check that available_content matches the sample data structure
13 |         expected_content = {
14 |             "@handle_1": ["content_1", "content_2", "content_3"],
15 |             "@handle_2": ["content_1", "content_2", "content_3"]
16 |         }
17 |         self.assertEqual(fools_content.available_content, expected_content)
18 |         
19 |     @patch("fools_content.open", new_callable=mock_open, read_data='{"@handle_1": ["content_1", "content_2", "content_3"], "@handle_2": ["content_1", "content_2", "content_3"]}')
20 |     def test_summarize_correct_counts(self, mock_file):
21 |         # Load the sample data and run summarize to set num_fools and num_posts_per_fool
22 |         fools_content.load_available_content()
23 |         fools_content.summarize()
24 |         
25 |         # Check that num_fools matches the number of top-level keys
26 |         self.assertEqual(fools_content.num_fools, 2)
27 |         
28 |         # Check that num_posts_per_fool contains the correct counts of posts per handle
29 |         self.assertEqual(fools_content.num_posts_per_fool, [3, 3])
30 | 
31 |     @patch("fools_content.open", side_effect=FileNotFoundError)
32 |     def test_load_available_content_file_not_found(self, mock_file):
33 |         # Test that available_content is empty when the file is not found
34 |         fools_content.load_available_content()
35 |         
36 |         # Check that available_content is set to an empty dictionary
37 |         self.assertEqual(fools_content.available_content, {})
38 | 
39 |     @patch("fools_content.open", new_callable=mock_open, read_data='{invalid_json}')
40 |     def test_load_available_content_json_decode_error(self, mock_file):
41 |         # Test that available_content is empty when JSON is invalid
42 |         fools_content.load_available_content()
43 |         
44 |         # Check that available_content is set to an empty dictionary
45 |         self.assertEqual(fools_content.available_content, {})
46 | 
47 | if __name__ == "__main__":
48 |     unittest.main()
49 | 


--------------------------------------------------------------------------------