├── .flake8 ├── marsha ├── .flake8 ├── __main__.py ├── requirements.txt ├── __init__.py ├── Makefile ├── mappers │ ├── base.py │ └── chatgpt.py ├── marsha.spec ├── utils.py ├── .time.py ├── stats.py ├── parse.py ├── base.py ├── helper.py ├── meta.py └── llm.py ├── examples ├── ocr │ ├── dummy.pdf │ └── tesseract.mrsh ├── images │ └── duckduckgo-terminal.gif ├── data-oriented │ ├── notebook │ │ ├── employees_by_department.csv │ │ ├── department_skills.csv │ │ └── data-mangling.ipynb │ ├── department_skills.csv │ ├── employees_by_department.csv │ ├── void_func_viz.mrsh │ ├── data_mangling.mrsh │ ├── data_mangling_complex.mrsh │ └── data_mangling_csv.mrsh ├── marsha-misc │ ├── types_only.mrsh │ ├── two_fns_file.mrsh │ ├── fn_reference.mrsh │ └── three_fns_file.mrsh ├── general-purpose │ ├── fibonacci.mrsh │ ├── lol.mrsh │ ├── now.mrsh │ ├── extract_args.mrsh │ ├── top_n_words.mrsh │ ├── roman_numerals.mrsh │ ├── sort_by_age.mrsh │ ├── external_api.mrsh │ ├── extract_connection_info.mrsh │ └── sort_modules.mrsh ├── sql │ └── query-builder.mrsh ├── web │ ├── city_to_h3.mrsh │ ├── duckduckgo.mrsh │ ├── weather.mrsh │ └── cnn.mrsh └── apis │ └── todos.mrsh ├── .gitignore ├── setup.py ├── .github └── workflows │ ├── lint.yaml │ ├── time_all.yaml │ ├── windows_run.yaml │ └── time.yaml ├── LICENSE ├── rfcs ├── 000 - RFC Template.md ├── 001 - Marsha Syntax.md └── 002 - Compiler Refactor RFC.md └── README.md /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | extend-ignore=E501 -------------------------------------------------------------------------------- /marsha/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | extend-ignore: E501 -------------------------------------------------------------------------------- /examples/ocr/dummy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alantech/marsha/HEAD/examples/ocr/dummy.pdf -------------------------------------------------------------------------------- /marsha/__main__.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from marsha.base import main 4 | 5 | # Entry point 6 | asyncio.run(main()) 7 | -------------------------------------------------------------------------------- /examples/images/duckduckgo-terminal.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alantech/marsha/HEAD/examples/images/duckduckgo-terminal.gif -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | build/ 3 | dist/ 4 | venv/ 5 | marsha.egg-info/ 6 | .env 7 | examples/**/*.py 8 | examples/**/requirements.txt 9 | *.ipynb* -------------------------------------------------------------------------------- /marsha/requirements.txt: -------------------------------------------------------------------------------- 1 | autopep8 2 | flake8 3 | mccabe 4 | mistletoe 5 | openai 6 | pycodestyle 7 | pydocstyle 8 | pyflakes 9 | pyinstaller 10 | pylama -------------------------------------------------------------------------------- /examples/data-oriented/notebook/employees_by_department.csv: -------------------------------------------------------------------------------- 1 | name, department 2 | Bob, Accounting 3 | Jake, Engineering 4 | Lisa, Engineering 5 | Michael, HR 6 | Sue, HR -------------------------------------------------------------------------------- /examples/data-oriented/department_skills.csv: -------------------------------------------------------------------------------- 1 | department, skill 2 | Accounting, math 3 | Accounting, spreadsheets 4 | Engineering, coding 5 | Engineering, linux 6 | HR, spreadsheets 7 | HR, organization -------------------------------------------------------------------------------- /examples/data-oriented/notebook/department_skills.csv: -------------------------------------------------------------------------------- 1 | department, skill 2 | Accounting, math 3 | Accounting, spreadsheets 4 | Engineering, coding 5 | Engineering, linux 6 | HR, spreadsheets 7 | HR, organization -------------------------------------------------------------------------------- /marsha/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | marsha. 3 | 4 | Marsha is a higher-level programming language. 5 | """ 6 | 7 | __version__ = "0.0.1" 8 | __author__ = 'Alan Technologies Maintainers' 9 | __credits__ = 'Alan Technologies' 10 | -------------------------------------------------------------------------------- /examples/data-oriented/employees_by_department.csv: -------------------------------------------------------------------------------- 1 | id, name, department, start_date 2 | 1, Bob, Accounting, 8/8/2003 3 | 1248, Jake, Engineering, 4/4/2013 4 | 14345, Lisa, Engineering, 0/0/0 5 | 98477, Michael, HR, 5/5/2023 6 | 12, Sue, HR, 1/1/2020 -------------------------------------------------------------------------------- /examples/marsha-misc/types_only.mrsh: -------------------------------------------------------------------------------- 1 | # type SKU 2 | brand_id, sku_id, color, type 3 | 20, 10040, 'red', 'shirt' 4 | 50, 10059, 'blue', 'shirt' 5 | 6 | # type person 7 | name, age 8 | Joe, 20 9 | Jane, 50 10 | Felix, 10 11 | Alex, 60 12 | -------------------------------------------------------------------------------- /examples/general-purpose/fibonacci.mrsh: -------------------------------------------------------------------------------- 1 | # func fibonacci(integer): integer in the set of fibonacci numbers 2 | 3 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 4 | 5 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 6 | 7 | * fibonacci(1) = 1 8 | * fibonacci(2) = 1 9 | * fibonacci(3) = 2 10 | * fibonacci(0) throws an error 11 | -------------------------------------------------------------------------------- /examples/general-purpose/lol.mrsh: -------------------------------------------------------------------------------- 1 | 2 | 3 | # func facebook(user request) facebook page 4 | 5 | Make facebook for me 6 | 7 | * facebook('home') = My facebook homepage 8 | * facebook('my friend') = My friend's facebook page -------------------------------------------------------------------------------- /examples/general-purpose/now.mrsh: -------------------------------------------------------------------------------- 1 | # func now(): the current date and time 2 | 3 | This function prints the current date and time in an english format. It includes the day of the week, month, day, and year for the date. For the time it incldues the hour, minute, AM/PM, and timezone. 4 | 5 | * now() = 'Today is Wednesday, August 2nd, 2023. It is currently 5:19PM CDT' 6 | * now() = 'Today is Monday, July 31st, 2023. It is currently 8:37AM CDT' 7 | * now() = 'Today is Sunday, January 20th, 2019. It is currently 3:14PM PST' -------------------------------------------------------------------------------- /examples/sql/query-builder.mrsh: -------------------------------------------------------------------------------- 1 | 5 | 6 | # func get_most_populated_cities(): postgres sql query generated 7 | 8 | This function generates a postgresql query to retrieve all columns for the TOP 5 cities from the 'city' table with the highest 'population'. 9 | 10 | * get_most_populated_cities() = 'SELECT * FROM table WHERE condition LIMIT 5;' 11 | * get_most_populated_cities() = 'SELECT * FROM table LIMIT 5;' 12 | -------------------------------------------------------------------------------- /examples/data-oriented/void_func_viz.mrsh: -------------------------------------------------------------------------------- 1 | # type EmployeesByDepartment employees_by_department.csv 2 | 3 | # type DepartmentSkills department_skills.csv 4 | 5 | # type EmployeeSkills 6 | name, skill 7 | Bob, math 8 | Jake, spreadsheets 9 | Lisa, coding 10 | Sue, spreadsheets 11 | 12 | # func visualize_employee_skills(list of EmployeesByDepartment, list of DepartmentSkills) 13 | 14 | This function receives a list of EmployeesByDepartment and a list of DepartmentSkills to create and plot an EmployeeSkills by merging the 2 list by department and plotting it as a pie chart. Use the pandas and matplotlib library. -------------------------------------------------------------------------------- /examples/general-purpose/extract_args.mrsh: -------------------------------------------------------------------------------- 1 | # func extract_args(string): list of arguments 2 | 3 | This function extracts all arguments from a given function call string. It returns a list of arguments. It should be able to extract also the arguments from nested functions. 4 | 5 | * extract_args('fn()') = [] 6 | * extract_args('sum(a, b)') = ['a', 'b'] 7 | * extract_args('sum(a, sub(b, 0))') = ['a', 'b', 0] 8 | * extract_args('sum(a, sub(b, id(c)))') = ['a', 'b', 'c'] 9 | * extract_args('') throws a not a function error 10 | * extract_args('a') throws a not a function error 11 | * extract_args('what ever string') throws a not a function error 12 | * extract_args(3) throws an error for invalid type 13 | -------------------------------------------------------------------------------- /examples/web/city_to_h3.mrsh: -------------------------------------------------------------------------------- 1 | # func city_to_h3(string of location to get the H3 index, resolution of the H3 index): H3 index as a string 2 | 3 | This function uses the 'h3' python library, as documented on https://h3geo.org specifically the 'geo_to_h3' function. 4 | 5 | For the location, the public.opendatasoft.com API should be used to query a city, for example, a query for "Austin, TX" looks like: https://public.opendatasoft.com/api/records/1.0/search/?dataset=geonames-all-cities-with-a-population-500&q=Austin%2C%20TX&facet=timezone&facet=country and then converts it into latitude and longitude, then uses the 'h3' 'geo_to_h3' function and the specified resolution to produce the H3 index 6 | 7 | * h3('Austin, TX', 9) = '894898d92abffff' 8 | * h3('San Francisco, CA', 200) raises an exception 9 | * h3('teh MOON', 9) raises an exception -------------------------------------------------------------------------------- /examples/general-purpose/top_n_words.mrsh: -------------------------------------------------------------------------------- 1 | # func top_n_words(filename, integer n): list of n tuples of words and counts, ordered from greatest to least 2 | 3 | This function should read the text file from the specified filename, and then determine the top 'n' most common words, making sure to convert all words to lower case and ignoring all numbers, punctuation, etc. 4 | 5 | It then generates a list of tuples. The first element of each tuple is the word as a string, the second element is the integer count of frequency of said word. This list should be ordered from greatest to least count. 6 | 7 | * top_n_words('/etc/hosts', 1) == [('localhost', 2)] 8 | * top_n_words('./top_n_words.mrsh', 2) == [('of', 6), ('to', 5)] 9 | * top_n_words('./top_n_words.mrsh, 0) == [] 10 | * top_n_words('./top_n_words.mrsh, -1) raises an exception 11 | * top_n_words('./nonexistent/file', 5) raises an exception -------------------------------------------------------------------------------- /marsha/Makefile: -------------------------------------------------------------------------------- 1 | ./dist/marsha: ./venv ./*.py ./marsha.spec 2 | . ./venv/bin/activate; pip install -r requirements.txt 3 | . ./venv/bin/activate; pyinstaller __main__.py --name marsha --onefile --collect-all pyflakes --collect-all mccabe --collect-all pycodestyle --collect-all pydocstyle --collect-all pylama --add-data ../examples:./examples --add-data ./helper.py:./marsha 4 | 5 | ./venv: 6 | (command -v $(python) && $(python) -m venv venv) || (command -v python && python -m venv venv) || (command -v python3 && python3 -m venv venv) 7 | 8 | .PHONY: clean 9 | clean: 10 | git clean -ffdx -e .env 11 | 12 | .PHONY: install 13 | install: ./dist/marsha 14 | cp ./dist/marsha /usr/local/bin/marsha 15 | 16 | .PHONY: format 17 | format: 18 | . ./venv/bin/activate; autopep8 -i *.py 19 | 20 | .PHONY: time 21 | time: ./dist/marsha .time.py 22 | . ./venv/bin/activate; pip install --upgrade ..; ./.time.py $(test) $(attempts) $(n_parallel_executions) $(stats) 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='marsha', 5 | version='0.0.1', 6 | description='Marsha is a higher-level programming language.', 7 | url='https://github.com/alantech/marsha', 8 | author='Alan Technologies Maintainers', 9 | author_email='hello@alantechnologies.com', 10 | license='MIT', 11 | packages=['marsha'], 12 | install_requires=[ 13 | 'autopep8', 14 | 'flake8', 15 | 'mccabe', 16 | 'mistletoe', 17 | 'openai', 18 | 'pycodestyle', 19 | 'pydocstyle', 20 | 'pyflakes', 21 | 'pyinstaller', 22 | 'pylama' 23 | ], 24 | classifiers=[ 25 | 'Development Status :: 2 - Pre-Alpha', 26 | 'Intended Audience :: Developers' 27 | 'License :: OSI Approved :: MIT License', 28 | 'Operating System :: POSIX :: Linux', 29 | 'Programming Language :: Python :: 3.10', 30 | ], 31 | ) 32 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: Lint project 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | 7 | jobs: 8 | run: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | with: 13 | ref: ${{ github.event.client_payload.ref }} 14 | - name: Set up Python 3.10 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: "3.10" 18 | - name: Lint project 19 | run: | 20 | cd marsha 21 | python -m venv venv 22 | source venv/bin/activate 23 | pip install -r requirements.txt 24 | PEP8_LINT_RESULTS=`autopep8 -d *.py` 25 | if [ "$PEP8_LINT_RESULTS" != '' ]; then 26 | echo $PEP8_LINT_RESULTS 27 | exit 1 28 | fi 29 | FLAKE8_LINT_RESULTS=`flake8 *.py` 30 | if [ "$FLAKE8_LINT_RESULTS" != '' ]; then 31 | echo $FLAKE8_LINT_RESULTS 32 | exit 1 33 | fi 34 | -------------------------------------------------------------------------------- /marsha/mappers/base.py: -------------------------------------------------------------------------------- 1 | class BaseMapper(): 2 | """Semi-abstract base for 'mappers' in Marsha""" 3 | 4 | def __init__(self): 5 | self.check_retries = 3 6 | self.output = None 7 | 8 | async def transform(self, i): 9 | raise Exception('Not implemented') 10 | 11 | async def check(self): 12 | # Define a check if you want, but not necessary 13 | return self.output 14 | 15 | async def run(self, i): 16 | try: 17 | self.output = await self.transform(i) 18 | except Exception as e: 19 | # TODO: Log the error before re-raise? 20 | raise e 21 | 22 | iters = self.check_retries 23 | while iters > 0: 24 | try: 25 | o = await self.check() 26 | return o 27 | except Exception: 28 | # Using the exception here as flow control 29 | iters = iters - 1 30 | 31 | raise Exception('Transformer failed to converge') 32 | -------------------------------------------------------------------------------- /examples/general-purpose/roman_numerals.mrsh: -------------------------------------------------------------------------------- 1 | # func roman_to_int(string of roman numerals): integer 2 | 3 | Parse a string as roman numerals and determine the integer it represents. 4 | 5 | In Roman Numerals, I = 1, V = 5, X = 10, L = 50, C = 100, D = 500, and M = 1000. 6 | 7 | Sequences of the same digit next to each other become additions. Eg II = 2, III = 3, XX = 20, etc. 8 | 9 | If a lower value symbol comes before a higher value symbol, it is subtracted from the higher value symbol. Eg IV = 4, IX = 9, etc. 10 | 11 | If a lower value symbol comes after a higher value symbol, it is added to the higher value symbol. Eg, VII = 7, DC = 600, etc. 12 | 13 | There are multiple representations of the same numeric value possible, eg IIII = IV, VII = IIIX. Most prefer to use the shortest representation, though. 14 | 15 | * roman_to_int('III') = 3 16 | * roman_to_int('IV') = 4 17 | * roman_to_int('IIII') = 4 18 | * roman_to_int('MMXXIII') = 2023 19 | * roman_to_int('MDCXXXII') = 1632 20 | * roman_to_int('twenty twenty three') = NaN -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Alan Technologies, Inc 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /examples/general-purpose/sort_by_age.mrsh: -------------------------------------------------------------------------------- 1 | # type person 2 | name, age 3 | Joe, 20 4 | Jane, 50 5 | Felix, 10 6 | Alex, 60 7 | 8 | # func sort_by_age(person list, ascending boolean flag): person list ordered by age 9 | 10 | This function receives a list of `person` objects and return them ordered by age ascending or descending depending on the boolean flag. The default value for the ascending flag is true. 11 | 12 | * sort_by_age([person('Joe', 20)]) = [person('Joe', 20)] 13 | * sort_by_age([person('Joe', 20)], false) = [person('Joe', 20)] 14 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)]) = [person('Felix', 10), person('Joe', 20), person('Jane', 50), person('Alex', 60)] 15 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)], true) = [person('Felix', 10), person('Joe', 20), person('Jane', 50), person('Alex', 60)] 16 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)], false) = [person('Alex', 60), person('Jane', 50), person('Joe', 20), person('Felix', 10)] 17 | * sort_by_age([]) = [] 18 | * sort_by_age() = throws a no list received error 19 | -------------------------------------------------------------------------------- /.github/workflows/time_all.yaml: -------------------------------------------------------------------------------- 1 | name: Run all time scripts 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | attempts: 7 | description: "Number of attempts to run the test" 8 | required: false 9 | default: 1 10 | parallel_runs: 11 | description: "Number of parallel runs" 12 | required: false 13 | default: 3 14 | 15 | jobs: 16 | time_all: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Checkout code 20 | uses: actions/checkout@v2 21 | 22 | - name: Call 'time' job for each test file 23 | env: 24 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 25 | GITHUB_REPOSITORY: ${{ github.repository }} 26 | 27 | run: | 28 | for file in ./examples/**/*.mrsh; do 29 | [ -e "$file" ] || continue 30 | echo "Calling 'time' job with 'test' input: .$file" 31 | curl -X POST -H "Authorization: token $GITHUB_TOKEN" -H "Accept: application/json" \ 32 | "https://api.github.com/repos/$GITHUB_REPOSITORY/dispatches" \ 33 | -d '{"event_type": "time", "client_payload": {"test": "'".$file"'", "parallel_runs": ${{github.event.inputs.parallel_runs}}, "attempts": ${{github.event.inputs.attempts}}, "ref": "${{ github.ref_name }}"}' 34 | done -------------------------------------------------------------------------------- /examples/web/duckduckgo.mrsh: -------------------------------------------------------------------------------- 1 | # func duckduckgo(search text): top three link names and URLs separated by newline 2 | 3 | This function executes a search using duckduckgo.com and returns the top three links (excluding ads) in the following format: 4 | 5 | First link name: https://first.link/path 6 | Second link name: https://www.secondlink.com/path 7 | Third link name: https://thirdlink.org/path 8 | 9 | * duckduckgo('search engine') = '21 Great Search Engines You Can Use Instead Of Google: https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwivx6SS1qeAAxU5lWoFHbuqA4sQFnoECBEQAQ&url=https%3A%2F%2Fwww.searchenginejournal.com%2Falternative-search-engines%2F271409%2F&usg=AOvVaw1MhHGUxrHf8AkmiU64AotH&opi=89978449\nThe Top 11 Search Engines, Ranked by Popularity: https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwivx6SS1qeAAxU5lWoFHbuqA4sQFnoECA8QAQ&url=https%3A%2F%2Fblog.hubspot.com%2Fmarketing%2Ftop-search-engines&usg=AOvVaw30ykZ9Ftz51L4pQTaMsmpQ&opi=89978449\nSearch engine - Wikipedia: https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=&cad=rja&uact=8&ved=2ahUKEwivx6SS1qeAAxU5lWoFHbuqA4sQFnoECCsQAQ&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSearch_engine&usg=AOvVaw2JG-HuD9odcoxnHHkUd3sl&opi=89978449' 10 | * duckduckgo(3) raises an exception -------------------------------------------------------------------------------- /examples/web/weather.mrsh: -------------------------------------------------------------------------------- 1 | # func weather(string of location to perform the weather report): newline delimited string report of current temperature, precipitation, and wind 2 | 3 | This function uses the Norwegian Meteorological Institute API (https://api.met.no) to get relevant weather information for the specified location and returns a newline-delimited string of the weather, including temperature, precipitation, and wind. The units are whichever is customary for the location in question. 4 | 5 | api.met.no requires a user agent be specified on queries or it will reject them. 6 | 7 | This API only accepts latitude (lat) and longitude (lon) parameters, so the public.opendatasoft.com API must be used in conjunction. For example, a query for "Austin, TX" looks like: https://public.opendatasoft.com/api/records/1.0/search/?dataset=geonames-all-cities-with-a-population-500&q=Austin%2C%20TX&facet=timezone&facet=country 8 | 9 | WHen done, the output should look like the following: 10 | 11 | Weather Report for Seattle, WA 12 | 13 | 63F Wind SSE 2MPH Precipitation 0" 14 | 15 | * weather('San Jose, CA') = 'Weather Report for San Jose, CA\n\n77F Wind N 1MPH Precipitation 0"' 16 | * weather('Beograd, Srbija') = 'Weather Report for Beograd, Srbija\n\n20C Wind SE 3km/h Precipitation 0ml' 17 | * weather('teh MOON') raises an exception -------------------------------------------------------------------------------- /examples/general-purpose/external_api.mrsh: -------------------------------------------------------------------------------- 1 | # func get_mrr(stripe API key, period): MRR 2 | 3 | Use stripe API. 4 | 5 | Calculate net monthly recurring revenue (MRR) for the period. 6 | 7 | To calculate MMR multiply the total number of paying customers by the average revenue per user per month. 8 | 9 | * get_mrr() = throws an error 10 | * get_mrr('') = throws an error 11 | * get_mrr('', 'not a date') = throws an error 12 | * get_mrr('pk_test_Dt4ZBItXSZT1EzmOd8yCxonL', 'Jan 2023') = 0.03 13 | * get_mrr('pk_test_Dt4ZBItXSZT1EzmOd8yCxonL', 'Jan 2021') = 0.35 14 | * get_mrr('pk_test_Dt4ZBItXSZT1EzmOd8yCxonL', 'Jun 2023') = 0.1 15 | 16 | 17 | # func get_mrr_growth_rate(stripe API key, start period, end period): MRR Growth rate (%) 18 | 19 | Use stripe API. 20 | 21 | Calculate monthly recurring revenue (MRR) for each period using `get_mrr`. 22 | 23 | Calculate MMR growth rate using the formula `MRR Growth Rate (%) = MRR Month B – MRR Month A / MRR Month A × 100` 24 | 25 | * get_revenue_growth() = throws an error 26 | * get_revenue_growth('') = throws an error 27 | * get_revenue_growth('', 'not a date', 'not a date') = throws an error 28 | * get_revenue_growth('pk_test_Dt4ZBItXSZT1EzmOd8yCxonL', 'Jan 2023', 'Mar 2023') = '3%' 29 | * get_revenue_growth('pk_test_Dt4ZBItXSZT1EzmOd8yCxonL', 'Jan 2021', 'Jan 2022') = '35%' 30 | * get_revenue_growth('pk_test_Dt4ZBItXSZT1EzmOd8yCxonL', 'Jan 2023', 'Jun 2023') = '10%' 31 | -------------------------------------------------------------------------------- /examples/web/cnn.mrsh: -------------------------------------------------------------------------------- 1 | # func cnn(string of section to take headlines from): list of headlines 2 | 3 | This function scrapes the cnn.com website for headlines. The section it takes the headlines from is passed to it, with 'home' referring to the homepage at cnn.com and should be special cased, while 'us' refers to cnn.com/us, 'politics' refers to cnn.com/politics, and so on for every top-level category CNN has. 4 | 5 | Headlines on cnn.com have the class 'container__headline', no other class name correctly identifies them as the website has changed. 6 | 7 | * cnn('home') = ["Florida's new standards for teaching Black history spark outrage", "His books sold over 300 million copies and were translated into 63 languages. Now, a museum is acknowledging his racism", "Player quits match in tears as tennis world slams opponent’s ‘absolutely disgusting’ actions"] 8 | * cnn('us') = ['18-year-old Miami woman arrested after allegedly trying to hire a hitman to go after her 3-year-old son', 'Investigation into Gilgo Beach serial killings suspect expands to Nevada and South Carolina', 'Rescue crews continue search for 2 children swept away by Pennsylvania floodwater that killed their mother'] 9 | * cnn('world') = ["Police raids follow shocking video of sexual assault in India’s Manipur state amid ethnic violence", 'Ukrainian air defenses in Odesa outgunned as Russia targets global grain supply', 'Anger boils over as Kenya’s cost of living protests shake the nation'] -------------------------------------------------------------------------------- /examples/general-purpose/extract_connection_info.mrsh: -------------------------------------------------------------------------------- 1 | # func extract_connection_info(database url): JSON object with connection properties 2 | 3 | the function extracts all the connection properties in a JSON format from the database url provided. The function should also make sure that the db url follows the sql alchemy definition of database url. The function should support all database schemes supported by sql alchemy. 4 | 5 | * extract_connection_info('postgresql://user:pass@0.0.0.0:5432/mydb') = { "protocol": "postgresql", "dbUser": "user", "dbPassword": "pass", "host": "0.0.0.0", "port": 5432, "database": "mydb" } 6 | * extract_connection_info('postgresql://user:pass@0.0.0.0:5432/mydb?sslmode=require') = { "protocol": "postgresql", "dbUser": "user", "dbPassword": "pass", "host": "0.0.0.0", "port": 5432, "database": "mydb", "extra": { "sslmode": "require" } } 7 | * extract_connection_info('mysql://user:pass0.0.0.0:3306/mydb?') = { "protocol": "mysql", "dbUser": "user", "dbPassword": "pass", "host": "0.0.0.0", "port": 3306, "database": "mydb" } 8 | * extract_connection_info('mysql://0.0.0.0:3306/mydb?ssl_check_hostname=false) = { "protocol": "mysql", "dbUser": "user", "dbPassword": "pass", "host": "0.0.0.0", "port": 3306, "database": "mydb", "extra": { "ssl_check_hostname": false } } 9 | * extract_connection_info('jdbc:oracle://user:pass0.0.0.0:1521/mydb?') = throws an error due to invalid db url 10 | * extract_connection_info('') = {} 11 | * extract_connection_info() = throws an error -------------------------------------------------------------------------------- /examples/marsha-misc/two_fns_file.mrsh: -------------------------------------------------------------------------------- 1 | # func fibonacci(integer): integer in the set of fibonacci numbers 2 | 3 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 4 | 5 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 6 | 7 | * fibonacci(1) = 1 8 | * fibonacci(2) = 1 9 | * fibonacci(3) = 2 10 | * fibonacci(0) throws an error 11 | 12 | # type person 13 | name, age 14 | Joe, 20 15 | Jane, 50 16 | Felix, 10 17 | Alex, 60 18 | 19 | # func sort_by_age(person list, ascending boolean flag): person list ordered by age 20 | 21 | This function receives a list of `person` objects and return them ordered by age ascending or descending depending on the boolean flag. The default value for the ascending flag is true. 22 | 23 | * sort_by_age([person('Joe', 20)]) = [person('Joe', 20)] 24 | * sort_by_age([person('Joe', 20)], false) = [person('Joe', 20)] 25 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)]) = [person('Felix', 10), person('Joe', 20), person('Jane', 50), person('Alex', 60)] 26 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)], true) = [person('Felix', 10), person('Joe', 20), person('Jane', 50), person('Alex', 60)] 27 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)], false) = [person('Alex', 60), person('Jane', 50), person('Joe', 20), person('Felix', 10)] 28 | * sort_by_age([]) = [] 29 | * sort_by_age() = throws a no list received error 30 | -------------------------------------------------------------------------------- /examples/ocr/tesseract.mrsh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # func extract_info(pdf file path): extracted info as dict 5 | 6 | This function will receive a pdf file path and will use tesseract OCR to extract structured data from the file. 7 | 8 | The pdf document has a tabular format, meaning the labels we will look for are headers and the content is in the same column in the next row or rows. 9 | 10 | We want to extract the following content from a table format where columns can be distinguished by spaces: 11 | 12 | `invoice_no` looking for the "INVOICE NO." header. The information will be in the next row. 13 | `customer_id` looking for the "CUSTOMER ID" header. The information will be in the next row. 14 | 15 | We want to extract the following content from a table format where columns can be distinguished by space between headers: 16 | 17 | `bill_to` looking for the "BILL TO" header. The information will be in the next 5 rows. 18 | `ship_to` looking for the "SHIP TO" header. The information will be in the next 5 rows. 19 | 20 | We want to extract the following label: 21 | 22 | `total` looking for "TOTAL" (all uppercase). The information will be to the right, not in the next row. 23 | 24 | 25 | * extract_info() = throws an error 26 | * extract_info('./pathA') = { 'bill_to': '', 'ship_to': '', 'invoice_no': '', 'customer_id': '123', 'total': '345' } 27 | -------------------------------------------------------------------------------- /examples/data-oriented/data_mangling.mrsh: -------------------------------------------------------------------------------- 1 | # type EmployeesByDepartment employees_by_department.csv 2 | 3 | # type DepartmentSkills department_skills.csv 4 | 5 | # type EmployeeSkills 6 | name, skill 7 | Bob, math 8 | Jake, spreadsheets 9 | Lisa, coding 10 | Sue, spreadsheets 11 | 12 | # func get_employee_skills(list of EmployeesByDepartment, list of DepartmentSkills): list of EmployeeSkills 13 | 14 | This function receives a list of EmployeesByDepartment and a list of DepartmentSkills. The function should be able to create a response of EmployeeSkills merging the 2 list by department. Use the pandas library. 15 | 16 | * get_employee_skills() = throws an error 17 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')]) = throws an error 18 | * get_employee_skills([], []) = [] 19 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], []) = [] 20 | * get_employee_skills([], [DepartmentSkills('Accounting', 'math')]) = [] 21 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')] 22 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting'), EmployeesByDepartment('Jake', 'Engineering')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')] 23 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting'), EmployeesByDepartment('Jake', 'Engineering')], [DepartmentSkills('Accounting', 'math'), DepartmentSkills('Engineering', 'coding')]) = [EmployeeSkills('Joe', 'math'), EmployeeSkills('Jake', 'coding')] -------------------------------------------------------------------------------- /examples/marsha-misc/fn_reference.mrsh: -------------------------------------------------------------------------------- 1 | # type student 2 | name, age 3 | Joe, 20 4 | Jane, 50 5 | Felix, 10 6 | Alex, 60 7 | 8 | # func sort_by_age(student list, ascending boolean flag): student list ordered by age 9 | 10 | This function receives a list of `student` objects and return them ordered by age ascending or descending depending on the boolean flag. The default value for the ascending flag is true. 11 | 12 | * sort_by_age([student('Joe', 20)]) = [student('Joe', 20)] 13 | * sort_by_age([student('Joe', 20)], false) = [student('Joe', 20)] 14 | * sort_by_age([student('Joe', 20), student('Jane', 50), student('Felix', 10), student('Alex', 60)]) = [student('Felix', 10), student('Joe', 20), student('Jane', 50), student('Alex', 60)] 15 | * sort_by_age([student('Joe', 20), student('Jane', 50), student('Felix', 10), student('Alex', 60)], true) = [student('Felix', 10), student('Joe', 20), student('Jane', 50), student('Alex', 60)] 16 | * sort_by_age([student('Joe', 20), student('Jane', 50), student('Felix', 10), student('Alex', 60)], false) = [student('Alex', 60), student('Jane', 50), student('Joe', 20), student('Felix', 10)] 17 | * sort_by_age([]) = [] 18 | * sort_by_age() = throws a no list received error 19 | 20 | 21 | # func take_youngest(student list): youngest student 22 | 23 | This function receive a list of students and should get the youngest one using the `sort_by_age` method. 24 | 25 | * take_youngest() = throws a no list received error 26 | * take_youngest([]) = None 27 | * take_youngest([student('Joe', 20)]) = student('Joe', 20) 28 | * take_youngest([student('Joe', 20), student('Jane', 50), student('Felix', 10), student('Alex', 60)]) = student('Felix', 10) 29 | 30 | -------------------------------------------------------------------------------- /.github/workflows/windows_run.yaml: -------------------------------------------------------------------------------- 1 | name: Run on Windows 2 | 3 | # Trigger workflow manually 4 | on: 5 | workflow_dispatch: 6 | inputs: 7 | test: 8 | description: "Marsha test to run" 9 | required: true 10 | default: "./examples/general-purpose/sort_modules.mrsh" 11 | attempts: 12 | description: "Number of attempts to run the test" 13 | required: false 14 | default: 1 15 | parallel_runs: 16 | description: "Number of parallel runs" 17 | required: false 18 | default: 3 19 | 20 | jobs: 21 | run-windows: 22 | runs-on: windows-latest 23 | steps: 24 | - uses: actions/checkout@v3 25 | with: 26 | ref: ${{ github.event.client_payload.ref }} 27 | - name: Set up Python 3.10 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: "3.10" 31 | - name: Display Python version 32 | run: python -c "import sys; print(sys.version)" 33 | - name: Output Inputs 34 | run: echo "${{ toJSON(github.event.inputs) }}" 35 | - name: Run time script 36 | id: run 37 | shell: bash 38 | run: | 39 | python -m venv venv 40 | source venv/Scripts/activate 41 | python -m pip install --upgrade . 42 | echo "python -m marsha -d -n ${{github.event.inputs.parallel_runs}} -a ${{github.event.inputs.attempts}} ${{github.event.inputs.test}}" 43 | python -m marsha -d -n ${{github.event.inputs.parallel_runs}} -a ${{github.event.inputs.attempts}} ${{github.event.inputs.test}} 44 | env: 45 | OPENAI_SECRET_KEY: ${{ secrets.OPENAI_SECRET_KEY }} 46 | -------------------------------------------------------------------------------- /examples/apis/todos.mrsh: -------------------------------------------------------------------------------- 1 | # type task 2 | name, status 3 | cooking, pending 4 | dishes, completed 5 | cleaning, pending 6 | 7 | # func save_task(task name): task dict 8 | 9 | This function receives a task name and creates a `task` object with it. The initial status for all tasks is `pending`. The value is saved in a global dictionary. 10 | 11 | * save_task() = throws an error 12 | * save_task('test') = task('test', 'pending') 13 | 14 | # func update_task(task name): task dict 15 | 16 | This function receives a task name and updates the status to `completed` for the `task` with the received name. The value is updated in a global dictionary. 17 | 18 | * update_task() = throws an error 19 | * update_task('test') = task('test', 'completed') 20 | 21 | # func get(dictionary with name property): task dict 22 | 23 | This function gets the requested task name from the global dictionary and return the task object. 24 | 25 | * get({'name': 'cooking'}) = task('cooking', 'pending') 26 | * get({'name': 'dishes'}) = task('dishes', 'completed') 27 | * get() = throws an error 28 | 29 | # func add(dictionary with name property): task dict 30 | 31 | This function calls the `save_task` function and take the task with the requested task name. 32 | 33 | * add({'name': 'cooking'}) = task('cooking', 'pending') 34 | * add({'name': 'dishes'}) = task('dishes', 'pending') 35 | * add() = throws an error 36 | 37 | # func complete(dictionary with name property): task dict 38 | 39 | This function calls the `update_task` function and take the task with the requested task name. 40 | 41 | * complete({'name': 'cooking'}) = task('cooking', 'completed') 42 | * complete({'name': 'dishes'}) = task('dishes', 'completed') 43 | * complete() = throws an error 44 | -------------------------------------------------------------------------------- /marsha/marsha.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | from PyInstaller.utils.hooks import collect_all 3 | 4 | datas = [('../examples', './examples'), ('./helper.py', './marsha')] 5 | binaries = [] 6 | hiddenimports = [] 7 | tmp_ret = collect_all('pyflakes') 8 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 9 | tmp_ret = collect_all('mccabe') 10 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 11 | tmp_ret = collect_all('pycodestyle') 12 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 13 | tmp_ret = collect_all('pydocstyle') 14 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 15 | tmp_ret = collect_all('pylama') 16 | datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] 17 | 18 | 19 | block_cipher = None 20 | 21 | 22 | a = Analysis( 23 | ['__main__.py'], 24 | pathex=[], 25 | binaries=binaries, 26 | datas=datas, 27 | hiddenimports=hiddenimports, 28 | hookspath=[], 29 | hooksconfig={}, 30 | runtime_hooks=[], 31 | excludes=[], 32 | win_no_prefer_redirects=False, 33 | win_private_assemblies=False, 34 | cipher=block_cipher, 35 | noarchive=False, 36 | ) 37 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 38 | 39 | exe = EXE( 40 | pyz, 41 | a.scripts, 42 | a.binaries, 43 | a.zipfiles, 44 | a.datas, 45 | [], 46 | name='marsha', 47 | debug=False, 48 | bootloader_ignore_signals=False, 49 | strip=False, 50 | upx=True, 51 | upx_exclude=[], 52 | runtime_tmpdir=None, 53 | console=True, 54 | disable_windowed_traceback=False, 55 | argv_emulation=False, 56 | target_arch=None, 57 | codesign_identity=None, 58 | entitlements_file=None, 59 | ) 60 | -------------------------------------------------------------------------------- /examples/general-purpose/sort_modules.mrsh: -------------------------------------------------------------------------------- 1 | # func sort_modules(list of modules where each module has a name and a list of module names it depends on): list of module names where all modules it depends on are before it in the list 2 | 3 | This function takes a list of modules, where each module has a name property that is a string and a dependencies property that is a list of strings that are the names of modules it depends on. The set of modules are supposed to be a DAG (Directed Acyclic Graph) such that one or more module is a "root" module with zero dependencies, and zero or more modules depend on other modules but never directly or indirectly depend on themselves. 4 | 5 | The function determines an ordering from these "root" modules to the dependent modules such that all of their dependencies come earlier in the list. This allows processing of the modules in list order without needing to worry whether or not any particular module is missing its dependency. 6 | 7 | In the case that the dependencies defined are *not* a DAG, it should raise an error describing the cycle found. 8 | 9 | * sort_modules([{"name": "a", "dependencies": []}, {"name": "b", "dependencies": ["c"]}, {"name": "c", "dependencies": ["a"]}) = ["a", "c", "b"] 10 | * sort_modules([{"name": "first_root", "dependencies": []}, {"name": "mid_node", "dependencies": ["first_root", "second_root"]}, {"name": "leaf", "dependencies": ["mid_node", "second_root"]}, {"name": "second_root", "dependencies": []}]) = ["first_root", "second_root", "mid_node", "leaf"] 11 | * sort_modules([{"name": "a", "dependencies": ["a"]}]) raises an error with the message: "Cycle detected: a -> a" 12 | * sort_modules([{"name": "a", "dependencies": ["b"]}, {"name": "b", "dependencies": ["c"]}, {"name": "c", "dependencies": ["a"]}]) raises an error with the message: "Cycle detected: a -> b -> c -> a" 13 | -------------------------------------------------------------------------------- /examples/data-oriented/data_mangling_complex.mrsh: -------------------------------------------------------------------------------- 1 | # type EmployeesByDepartment employees_by_department.csv 2 | 3 | # type DepartmentSkills department_skills.csv 4 | 5 | # type EmployeeSkills 6 | name, skill 7 | Bob, math 8 | Jake, spreadsheets 9 | Lisa, coding 10 | Sue, spreadsheets 11 | 12 | # func get_eng_skills(list of EmployeesByDepartment, list of DepartmentSkills): list of EmployeeSkills 13 | 14 | This function receives a list of EmployeesByDepartment and a list of DepartmentSkills. It should exclude any data from the input lists with missing or invalid data and filter through EmployeesByDepartment list to only include employees in the engineering department. The function should then create a response of EmployeeSkills merging the 2 input lists by department. 15 | 16 | * get_employee_skills() = throws an error 17 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting', '8/8/2023')]) = throws an error 18 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], []) = throws an error 19 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting', null)], []) = throws an error 20 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting', '1/1/0')], []) = throws an error 21 | * get_employee_skills([], []) = [] 22 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting', '8/8/2023')], []) = [] 23 | * get_employee_skills([], [DepartmentSkills('Accounting', 'math')]) = [] 24 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting', '8/8/2023')], [DepartmentSkills('Accounting', 'math')]) = [] 25 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting', '8/8/2012'), EmployeesByDepartment('Jake', 'Engineering', '8/8/2023')], [DepartmentSkills('Engineering', 'coding')]) = [EmployeeSkills('Jake', 'coding')] 26 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting', '8/8/2025'), EmployeesByDepartment('Jake', 'Engineering', '8/8/2021')], [DepartmentSkills('Accounting', 'math'), DepartmentSkills('Engineering', 'coding')]) = [EmployeeSkills('Jake', 'coding')] -------------------------------------------------------------------------------- /marsha/utils.py: -------------------------------------------------------------------------------- 1 | from inspect import getsourcefile 2 | import autopep8 3 | import os 4 | import shutil 5 | 6 | 7 | def prettify_time_delta(delta, max_depth=2): 8 | rnd = round if max_depth == 1 else int 9 | if not max_depth: 10 | return '' 11 | if delta < 1: 12 | return f'''{format(delta * 1000, '3g')}ms''' 13 | elif delta < 60: 14 | sec = rnd(delta) 15 | subdelta = delta - sec 16 | return f'''{format(sec, '2g')}sec {prettify_time_delta(subdelta, max_depth - 1)}'''.rstrip() 17 | elif delta < 3600: 18 | mn = rnd(delta / 60) 19 | subdelta = delta - mn * 60 20 | return f'''{format(mn, '2g')}min {prettify_time_delta(subdelta, max_depth - 1)}'''.rstrip() 21 | elif delta < 86400: 22 | hr = rnd(delta / 3600) 23 | subdelta = delta - hr * 3600 24 | return f'''{format(hr, '2g')}hr {prettify_time_delta(subdelta, max_depth - 1)}'''.rstrip() 25 | else: 26 | day = rnd(delta / 86400) 27 | subdelta = delta - day * 86400 28 | return f'''{format(day, '2g')}days {prettify_time_delta(subdelta, max_depth - 1)}'''.rstrip() 29 | 30 | 31 | def read_file(filename: str, mode: str = 'r'): 32 | with open(filename, mode) as f: 33 | content = f.read() 34 | return content 35 | 36 | 37 | def write_file(filename: str, content: str, mode: str = 'w'): 38 | with open(filename, mode) as f: 39 | f.write(content) 40 | 41 | 42 | def autoformat_files(files: list[str]): 43 | for file in files: 44 | before = read_file(file) 45 | after = autopep8.fix_code(before) 46 | write_file(file, after) 47 | 48 | 49 | def copy_file(src: str, dest: str): 50 | shutil.copyfile(src, dest) 51 | 52 | 53 | def copy_tree(src: str, dest: str): 54 | shutil.copytree(src, dest) 55 | 56 | 57 | def get_filename_from_path(path: str): 58 | return os.path.splitext(os.path.basename(path))[0] 59 | 60 | 61 | def add_helper(filename: str): 62 | helper = os.path.join(os.path.dirname( 63 | os.path.abspath(getsourcefile(lambda: 0))), 'helper.py') 64 | with open(filename, 'a') as o, open(helper, 'r') as i: 65 | o.write(i.read()) 66 | -------------------------------------------------------------------------------- /examples/marsha-misc/three_fns_file.mrsh: -------------------------------------------------------------------------------- 1 | # func fibonacci(integer): integer in the set of fibonacci numbers 2 | 3 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 4 | 5 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 6 | 7 | * fibonacci(1) = 1 8 | * fibonacci(2) = 1 9 | * fibonacci(3) = 2 10 | * fibonacci(0) throws an error 11 | 12 | # func extract_args(function call string): list of all arguments 13 | 14 | This function extracts all arguments from a given function call string. Some arguments might be function calls themselves, so it need to work recursively in order to just keep constants and named variable. It returns a list of arguments (constants and named variables). 15 | 16 | * extract_args('fn()') = [] 17 | * extract_args('sum(a, b)') = ['a', 'b'] 18 | * extract_args('sum(a, sub(b, 0))') = ['a', 'b', 0] 19 | * extract_args('sum(a, sub(b, id(c)))') = ['a', 'b', 'c'] 20 | * extract_args('') throws a not a function error 21 | * extract_args('a') throws a not a function error 22 | * extract_args('what ever string') throws a not a function error 23 | * extract_args(3) throws an error for invalid type 24 | 25 | # type person 26 | name, age 27 | Joe, 20 28 | Jane, 50 29 | Felix, 10 30 | Alex, 60 31 | 32 | # func sort_by_age(person list, ascending boolean flag): person list ordered by age 33 | 34 | This function receives a list of `person` objects and return them ordered by age ascending or descending depending on the boolean flag. The default value for the ascending flag is true. 35 | 36 | * sort_by_age([person('Joe', 20)]) = [person('Joe', 20)] 37 | * sort_by_age([person('Joe', 20)], false) = [person('Joe', 20)] 38 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)]) = [person('Felix', 10), person('Joe', 20), person('Jane', 50), person('Alex', 60)] 39 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)], true) = [person('Felix', 10), person('Joe', 20), person('Jane', 50), person('Alex', 60)] 40 | * sort_by_age([person('Joe', 20), person('Jane', 50), person('Felix', 10), person('Alex', 60)], false) = [person('Alex', 60), person('Jane', 50), person('Joe', 20), person('Felix', 10)] 41 | * sort_by_age([]) = [] 42 | * sort_by_age() = throws a no list received error 43 | -------------------------------------------------------------------------------- /examples/data-oriented/data_mangling_csv.mrsh: -------------------------------------------------------------------------------- 1 | # type EmployeesByDepartment employees_by_department.csv 2 | 3 | # type DepartmentSkills department_skills.csv 4 | 5 | # type EmployeeSkills 6 | name, skill 7 | Bob, math 8 | Jake, spreadsheets 9 | Lisa, coding 10 | Sue, spreadsheets 11 | 12 | # func get_employee_skills(list of EmployeesByDepartment, list of DepartmentSkills): list of EmployeeSkills 13 | 14 | This function receives a list of EmployeesByDepartment and a list of DepartmentSkills. The function should be able to create a response of EmployeeSkills merging the 2 list by department. Use the pandas library. 15 | 16 | * get_employee_skills() = throws an error 17 | * get_employee_skills([EmployeesByDepartment(1, 'Joe', 'Accounting', '8/8/2003')]) = throws an error 18 | * get_employee_skills([], []) = [] 19 | * get_employee_skills([EmployeesByDepartment(1, 'Joe', 'Accounting', '8/8/2003')], []) = [] 20 | * get_employee_skills([], [DepartmentSkills('Accounting', 'math')]) = [] 21 | * get_employee_skills([EmployeesByDepartment(1, 'Joe', 'Accounting', '8/8/2003')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')] 22 | * get_employee_skills([EmployeesByDepartment(1, 'Joe', 'Accounting', '8/8/2003'), EmployeesByDepartment(2, 'Jake', 'Engineering', '10/9/2005')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')] 23 | * get_employee_skills([EmployeesByDepartment(1, 'Joe', 'Accounting', '8/8/2003'), EmployeesByDepartment(2, 'Jake', 'Engineering', '10/9/2005')], [DepartmentSkills('Accounting', 'math'), DepartmentSkills('Engineering', 'coding')]) = [EmployeeSkills('Joe', 'math'), EmployeeSkills('Jake', 'coding')] 24 | 25 | # func read_csv_file(path to file): file data without header 26 | 27 | This function should read the content of a CSV file located at the specified path and return the data without the header row. 28 | 29 | * read_csv_file() = throws an error 30 | * read_csv_file('./pathA') = '1,2,3\n3,4,5' 31 | 32 | # func process_data(path to file with EmployeesByDepartment, path to file with DepartmentSkills): EmployeeSkills list as csv formatted string 33 | 34 | This function uses `read_csv_file` to read the 2 csv files received and create the respective lists. Make sure to strip and lower each string property coming from the csv. Then, call and return the result from `get_employee_skills` as csv formatted string. 35 | 36 | * process_data('/pathA', '') = throws an error 37 | * process_data('/pathA', '/pathB') = 'name, skill\nJoe, math' 38 | * process_data('/pathA', 'pathC') = 'name, skill\nJoe, math\nJake, coding' 39 | -------------------------------------------------------------------------------- /marsha/mappers/chatgpt.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import time 3 | 4 | from marsha.mappers.base import BaseMapper 5 | from marsha.stats import stats 6 | from marsha.utils import prettify_time_delta 7 | 8 | # Get time at startup to make human legible "start times" in the logs 9 | t0 = time.time() 10 | 11 | 12 | async def retry_chat_completion(query, model='gpt-3.5-turbo', max_tries=3, n_results=1): 13 | t1 = time.time() 14 | query['model'] = model 15 | query['n'] = n_results 16 | while True: 17 | try: 18 | out = await openai.ChatCompletion.acreate(**query) 19 | t2 = time.time() 20 | print( 21 | f'''Chat query took {prettify_time_delta(t2 - t1)}, started at {prettify_time_delta(t1 - t0)}, ms/chars = {(t2 - t1) * 1000 / out.get('usage', {}).get('total_tokens', 9001)}''') 22 | return out 23 | except openai.error.InvalidRequestError as e: 24 | if e.code == 'context_length_exceeded': 25 | # Try to cover up this error by choosing the bigger, more expensive model 26 | query['model'] = 'gpt-4' 27 | max_tries = max_tries - 1 28 | if max_tries == 0: 29 | raise e 30 | time.sleep(3 / max_tries) 31 | except Exception as e: 32 | max_tries = max_tries - 1 33 | if max_tries == 0: 34 | raise e 35 | time.sleep(3 / max_tries) 36 | if max_tries == 0: 37 | raise Exception('Could not execute chat completion') 38 | 39 | 40 | class ChatGPTMapper(BaseMapper): 41 | """ChatGPT-based mapper class""" 42 | 43 | def __init__(self, system, model='gpt-3.5-turbo', max_tokens=None, max_retries=3, n_results=1, stats_stage=None): 44 | BaseMapper.__init__(self) 45 | self.system = system 46 | self.model = model 47 | self.max_tokens = max_tokens 48 | self.max_retries = max_retries 49 | self.n_results = n_results 50 | self.stats_stage = stats_stage 51 | 52 | async def transform(self, user_request): 53 | query_obj = { 54 | 'messages': [{ 55 | 'role': 'system', 56 | 'content': self.system, 57 | }, { 58 | 'role': 'user', 59 | 'content': user_request, 60 | }], 61 | } 62 | if self.max_tokens is not None: 63 | query_obj['max_tokens'] = self.max_tokens 64 | res = await retry_chat_completion(query_obj, self.model, self.max_retries, self.n_results) 65 | 66 | if self.stats_stage is not None: 67 | stats.stage_update(self.stats_stage, [res]) 68 | 69 | return [choice.message.content for choice in res.choices] if self.n_results > 1 else res.choices[0].message.content 70 | -------------------------------------------------------------------------------- /marsha/.time.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import math 5 | import os 6 | import time 7 | 8 | from marsha.utils import prettify_time_delta 9 | 10 | from mistletoe import Document, ast_renderer 11 | 12 | 13 | # Parse the input arguments 14 | parser = argparse.ArgumentParser( 15 | prog='.time.py', 16 | description='Time the execution of Marsha on the same source multiple times' 17 | ) 18 | parser.add_argument('source') 19 | parser.add_argument('attempts', type=int, default=3) 20 | parser.add_argument('n_parallel_executions', type=int, default=1) 21 | parser.add_argument('stats', type=bool, default=False) 22 | args = parser.parse_args() 23 | 24 | exitcodes = [] 25 | times = [] 26 | calls = [] 27 | cost = [] 28 | total_runs = 30 29 | for i in range(total_runs): 30 | print(f'Run {i + 1} / {total_runs}') 31 | t_1 = time.time() 32 | print( 33 | f'Running ./dist/marsha {args.source} -a {args.attempts} -n {args.n_parallel_executions} {args.stats and "-s"}') 34 | exitcode = os.system( 35 | f'./dist/marsha {args.source} -a {args.attempts} -n {args.n_parallel_executions} {args.stats and "-s"}') 36 | t_2 = time.time() 37 | testtime = t_2 - t_1 38 | exitcodes.append(exitcode) 39 | times.append(testtime) 40 | if args.stats: 41 | try: 42 | run_stats_file = open('stats.md', 'r') 43 | run_stats = run_stats_file.read() 44 | run_stats_file.close() 45 | except Exception: 46 | raise Exception('Error reading stats file. Maybe something went run while running Marsha and the stats were not generated?') 47 | try: 48 | ast = ast_renderer.get_ast(Document(run_stats)) 49 | results_child = ast['children'].pop() 50 | calls.append(int(results_child[ 51 | 'children'][2]['content'].split('Total calls: ').pop())) 52 | cost.append(float(results_child[ 53 | 'children'][6]['content'].split('Total cost: ').pop())) 54 | except Exception as e: 55 | print(f'Error: {e}') 56 | calls.append(0) 57 | cost.append(0) 58 | with open('agg_stats.md', 'a') as f: 59 | f.write(f'''# Run {i + 1} / {total_runs} 60 | Exit code: {exitcode} 61 | Time: {prettify_time_delta(testtime)} 62 | Stats: 63 | 64 | ```md 65 | {run_stats} 66 | ``` 67 | 68 | ''') 69 | 70 | 71 | successes = [True if code == 0 else False for code in exitcodes] 72 | # Time calculations 73 | totaltime = sum(times) 74 | avgtime = totaltime / total_runs 75 | square_errors = [(t - avgtime) ** 2 for t in times] 76 | stddevtime = math.sqrt(sum(square_errors) / total_runs) 77 | # Call calculations 78 | totalcalls = sum(calls) 79 | avgcalls = round(totalcalls / total_runs, 2) 80 | square_errors = [(c - avgcalls) ** 2 for c in calls] 81 | stddevcalls = round(math.sqrt(sum(square_errors) / total_runs), 2) 82 | # Cost calculations 83 | totalcost = round(sum(cost), 2) 84 | avgcost = round(totalcost / total_runs, 2) 85 | square_errors = [(c - avgcost) ** 2 for c in cost] 86 | stddevcost = round(math.sqrt(sum(square_errors) / total_runs), 2) 87 | 88 | results = f''' 89 | # Test results 90 | `{sum(successes)} / {total_runs} runs successful` 91 | **Avg Runtime**: `{prettify_time_delta(avgtime)} +/- {prettify_time_delta(stddevtime)}` 92 | **Avg GPT calls**: `{avgcalls} +/- {stddevcalls}` 93 | **Avg cost**: `{avgcost} +/- {stddevcost}` 94 | **Total cost**: `{totalcost}` 95 | ''' 96 | print(results) 97 | res_file = open('results.md', 'w') 98 | res_file.write(results) 99 | res_file.close() 100 | 101 | if args.stats: 102 | with open('agg_stats.md', 'r') as f: 103 | stats = f.read() 104 | print(stats) 105 | -------------------------------------------------------------------------------- /rfcs/000 - RFC Template.md: -------------------------------------------------------------------------------- 1 | # 000 - RFC Template 2 | 3 | ## Current Status 4 | 5 | ### Proposed 6 | 7 | YYYY-MM-DD 8 | 9 | ### Accepted 10 | 11 | YYYY-MM-DD 12 | 13 | #### Approvers 14 | 15 | - Full Name 16 | 17 | ### Implementation 18 | 19 | - [ ] Implemented: [One or more PRs](https://github.com/alantech/marsha/some-pr-link-here) YYYY-MM-DD 20 | - [ ] Revoked/Superceded by: [RFC ###](./000 - RFC Template.md) YYYY-MM-DD 21 | 22 | ## Author(s) 23 | 24 | - Author A 25 | - Author B 26 | 27 | ## Summary 28 | 29 | A brief, one to two paragraph summary of the problem and proposed solution goes here. The name of the PR *must* include "RFC" in it to be searchable. 30 | 31 | ## Proposal 32 | 33 | A more detailed description of the proposed changes, what they will solve, and why it should be done. Diagrams and code examples very welcome! 34 | 35 | Reviewers should *not* bring up alternatives in this portion unless the template is not being followed by the RFC author (which the RFC author should note in the PR with a detailed reason why). Reviewers should also not let personal distaste for a solution be the driving factor behind criticism of a proposal, there should be some rationale behind a criticism, though you can still voice your distaste since that means there's probably *something* there that perhaps another reviewer could spot (but distate on its own should not block). 36 | 37 | Most importantly, be civil on both proposals and reviews. `iasql` is meant to be an approachable tool for developers and if we want to make it better we need to be approachable to each other. Some parts of the language may have been mistakes, but they certainly weren't intentional and all parts were thought over by prior contributors. New proposals come from people who see something that doesn't sit well with them and they have put forth the energy to write a proposal and we should be thankful that they care and want to make it better. 38 | 39 | Ideally everyone can come to a refined version of the RFC that satisfies all arguments and is better than what anyone person could have come up with, but if an RFC is divisive, the "winning" side should be gracious, and the "losing" side should hopefully accept that the proposal was contentious. 40 | 41 | ### Alternatives Considered 42 | 43 | After proposing the solution, any and all alternatives should be listed along with reasons why they are rejected. 44 | 45 | Authors should *not* reject alternatives just because they don't "like" them, there should be a more solid reason 46 | 47 | Reviewers should *not* complain about a lack of detail in the alternative descriptions especially if that is their own preferred solution -- they should attempt to positively describe the solution and bring their own arguments and proof for it. 48 | 49 | ## Expected Semver Impact 50 | 51 | A brief description of the expected impact on the Semantic versioning. 52 | 53 | Would this be considered a patch (no user-facing changes, but internal architectural changes. Bug fixes, new modules)? 54 | 55 | Would this be considered a minor update (new functionality with zero impact on existing functionality. API changes, new iasql functions?)? 56 | 57 | Would this be considered a major update (breaking the behavior of existing code)? 58 | 59 | RFCs that are a major update are more likely to be rejected or modified to become a minor or patch update, if possible. If not possible, major version RFCs are likely to be delayed and batched together with other major version RFC updates. 60 | 61 | ## Affected Components 62 | 63 | A brief listing of what part(s) of the engine will be impacted should be written here. 64 | 65 | ## Expected Timeline 66 | 67 | An RFC proposal should define the set of work that needs to be done, in what order, and with an expected level of effort and turnaround time necessary. *No* multi-stage work proposal should leave the engine in a non-functioning state. 68 | -------------------------------------------------------------------------------- /.github/workflows/time.yaml: -------------------------------------------------------------------------------- 1 | name: Run time script 2 | 3 | # Trigger workflow manually 4 | on: 5 | pull_request: 6 | branches: [main] 7 | repository_dispatch: 8 | types: [time] 9 | workflow_dispatch: 10 | inputs: 11 | test: 12 | description: "Marsha test to run" 13 | required: true 14 | default: "../examples/general-purpose/sort_modules.mrsh" 15 | attempts: 16 | description: "Number of attempts to run the test" 17 | required: false 18 | default: 1 19 | parallel_runs: 20 | description: "Number of parallel runs" 21 | required: false 22 | default: 3 23 | 24 | jobs: 25 | run: 26 | runs-on: ubuntu-latest 27 | outputs: 28 | results: ${{ steps.run.outputs.results }} 29 | steps: 30 | - uses: actions/checkout@v3 31 | with: 32 | ref: ${{ github.event.client_payload.ref }} 33 | - name: Set up Python 3.10 34 | uses: actions/setup-python@v4 35 | with: 36 | python-version: "3.10" 37 | - name: Display Python version 38 | run: python -c "import sys; print(sys.version)" 39 | - name: Output Inputs 40 | run: echo "${{ toJSON(github.event.inputs) }}" 41 | - name: Output Payload 42 | run: echo "${{ toJSON(github.event.client_payload) }}" 43 | - name: Run time script 44 | id: run 45 | run: | 46 | cd marsha 47 | if ${{ github.event.client_payload.test != '' }}; then 48 | make attempts=${{github.event.client_payload.attempts}} test=${{github.event.client_payload.test}} stats=True n_parallel_executions=${{github.event.client_payload.parallel_runs}} time 49 | elif ${{ github.event.inputs.test != '' }}; then 50 | make attempts=${{github.event.inputs.attempts}} test=${{github.event.inputs.test}} stats=True n_parallel_executions=${{github.event.inputs.parallel_runs}} time 51 | else 52 | make attempts=1 test=../examples/general-purpose/sort_modules.mrsh stats=True n_parallel_executions=3 time 53 | fi 54 | results=$(echo $(cat results.md) | tr '\n' ' ' | sed "s/# Test results //") 55 | echo "results=$results" >> $GITHUB_OUTPUT 56 | env: 57 | OPENAI_SECRET_KEY: ${{ secrets.OPENAI_SECRET_KEY }} 58 | 59 | notify: 60 | name: Discord Notification 61 | runs-on: ubuntu-latest 62 | needs: # make sure the notification is sent AFTER the jobs you want included have completed 63 | - run 64 | if: ${{ always() }} # You always want to be notified: success, failure, or cancelled 65 | 66 | steps: 67 | - name: Extract branch name 68 | shell: bash 69 | run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT 70 | id: extract_branch 71 | - name: Notify from input 72 | uses: nobrayner/discord-webhook@v1 73 | if: ${{ github.event.inputs.test != '' }} 74 | with: 75 | github-token: ${{ secrets.github_token }} 76 | discord-webhook: ${{ secrets.DISCORD_WEBHOOK }} 77 | title: '${{ github.workflow }}: {{STATUS}}' 78 | description: '**Branch:** `${{ steps.extract_branch.outputs.branch }}` **Test file:** `${{ github.event.inputs.test }}` **Parallelism:** `${{ github.event.inputs.parallel_runs }}` **Results:** ${{ needs.run.outputs.results }}' 79 | include-details: false 80 | - name: Notify from payload 81 | uses: nobrayner/discord-webhook@v1 82 | if: ${{ github.event.client_payload.test != '' }} 83 | with: 84 | github-token: ${{ secrets.github_token }} 85 | discord-webhook: ${{ secrets.DISCORD_WEBHOOK }} 86 | title: '${{ github.workflow }}: {{STATUS}}' 87 | description: '**Branch:** `${{ github.event.client_payload.ref }}` **Test file:** `${{ github.event.client_payload.test }}` **Parallelism:** `${{ github.event.client_payload.parallel_runs }}` **Results:** ${{ needs.run.outputs.results }}' 88 | include-details: false 89 | - name: Notify from PR 90 | uses: nobrayner/discord-webhook@v1 91 | if: ${{ github.event.client_payload.test == '' && github.event.client_payload.test == '' }} 92 | with: 93 | github-token: ${{ secrets.github_token }} 94 | discord-webhook: ${{ secrets.DISCORD_WEBHOOK }} 95 | title: '${{ github.workflow }}: {{STATUS}}' 96 | description: '**Branch:** `${{ github.ref_name }}` **Test file:** `../examples/general-purpose/sort_modules.mrsh` **Parallelism:** `3` **Results:** ${{ needs.run.outputs.results }}' 97 | include-details: false 98 | 99 | -------------------------------------------------------------------------------- /marsha/stats.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from marsha.utils import write_file 4 | 5 | # OpenAI pricing model. 6 | # Format: (tokens, price). Price per 1024 tokens. 7 | PRICING_MODEL = { 8 | 'gpt35': { 9 | 'in': [(4096, 0.0015), (16384, 0.002)], 10 | 'out': [(4096, 0.002), (16384, 0.004)] 11 | }, 12 | 'gpt4': { 13 | 'in': [(8192, 0.03), (32768, 0.06)], 14 | 'out': [(8192, 0.06), (32768, 0.12)] 15 | } 16 | } 17 | 18 | 19 | class ModelStats: 20 | def __init__(self, name, input_tokens, output_tokens, input_cost, output_cost, total_cost): 21 | self.name = name 22 | self.input_tokens = input_tokens 23 | self.output_tokens = output_tokens 24 | self.input_cost = input_cost 25 | self.output_cost = output_cost 26 | self.total_cost = total_cost 27 | 28 | 29 | class StageStats: 30 | def __init__(self, name, total_time, total_calls): 31 | self.name = name 32 | self.total_time = total_time 33 | self.total_calls = total_calls 34 | self.gpt35 = ModelStats('gpt-3.5-turbo', 0, 0, 0, 0, 0) 35 | self.gpt4 = ModelStats('gpt-4', 0, 0, 0, 0, 0) 36 | 37 | 38 | class MarshaStats: 39 | def __init__(self): 40 | self.total_time = 0 41 | self.total_calls = 0 42 | self.attempts = 0 43 | self.total_cost = 0 44 | self.first_stage = StageStats('first_stage', 0, 0) 45 | self.second_stage = StageStats('second_stage', 0, 0) 46 | self.third_stage = StageStats('third_stage', 0, 0) 47 | 48 | def aggregate(self, total_time, attempts): 49 | self.total_time = total_time 50 | self.attempts = attempts 51 | self.total_calls = self.first_stage.total_calls + \ 52 | self.second_stage.total_calls + self.third_stage.total_calls 53 | self.total_cost = self.first_stage.gpt35.total_cost + self.first_stage.gpt4.total_cost + self.second_stage.gpt35.total_cost + \ 54 | self.second_stage.gpt4.total_cost + \ 55 | self.third_stage.gpt35.total_cost + self.third_stage.gpt4.total_cost 56 | 57 | def stage_update(self, stage: str, res: list): 58 | rsetattr(self, f'{stage}.total_calls', rgetattr( 59 | self, f'{stage}.total_calls') + len(res)) 60 | for r in res: 61 | model = 'gpt4' if r.model.startswith('gpt-4') else 'gpt35' 62 | input_tokens = r.usage.prompt_tokens 63 | rsetattr(self, f'{stage}.{model}.input_tokens', rgetattr( 64 | self, f'{stage}.{model}.input_tokens') + input_tokens) 65 | pricing = PRICING_MODEL[model] 66 | # Calculate input cost based on context length 67 | if (input_tokens <= pricing['in'][0][0]): 68 | rsetattr(self, f'{stage}.{model}.input_cost', rgetattr( 69 | self, f'{stage}.{model}.input_cost') + input_tokens * pricing['in'][0][1] / 1024) 70 | elif (input_tokens <= pricing['in'][1][0]): 71 | rsetattr(self, f'{stage}.{model}.input_cost', rgetattr( 72 | self, f'{stage}.{model}.input_cost') + input_tokens * pricing['in'][1][1] / 1024) 73 | output_tokens = r.usage.completion_tokens 74 | rsetattr(self, f'{stage}.{model}.output_tokens', rgetattr( 75 | self, f'{stage}.{model}.output_tokens') + output_tokens) 76 | # Calculate output cost based on context length 77 | if (output_tokens <= pricing['out'][0][0]): 78 | rsetattr(self, f'{stage}.{model}.output_cost', rgetattr( 79 | self, f'{stage}.{model}.output_cost') + output_tokens * pricing['out'][0][1] / 1024) 80 | elif (output_tokens <= pricing['out'][1][0]): 81 | rsetattr(self, f'{stage}.{model}.output_cost', rgetattr( 82 | self, f'{stage}.{model}.output_cost') + output_tokens * pricing['out'][1][1] / 1024) 83 | # Calculate total cost 84 | rsetattr(self, f'{stage}.{model}.total_cost', rgetattr(self, f'{stage}.{model}.total_cost') + 85 | rgetattr(self, f'{stage}.{model}.input_cost') + rgetattr(self, f'{stage}.{model}.output_cost')) 86 | 87 | def to_file(self, filename: str = 'stats.md'): 88 | write_file(filename, content=self.__str__()) 89 | 90 | def __repr__(self): 91 | return self.__str__() 92 | 93 | def __str__(self): 94 | return f'''# Stats 95 | 96 | ## First stage 97 | Total time: {self.first_stage.total_time} 98 | Total calls: {self.first_stage.total_calls} 99 | Total cost: {self.first_stage.gpt35.total_cost + self.first_stage.gpt4.total_cost} 100 | 101 | ## Second stage 102 | Total time: {self.second_stage.total_time} 103 | Total calls: {self.second_stage.total_calls} 104 | Total cost: {self.second_stage.gpt35.total_cost + self.second_stage.gpt4.total_cost} 105 | 106 | ## Third stage 107 | Total time: {self.third_stage.total_time} 108 | Total calls: {self.third_stage.total_calls} 109 | Total cost: {self.third_stage.gpt35.total_cost + self.third_stage.gpt4.total_cost} 110 | 111 | ## Total 112 | Total time: {self.total_time} 113 | Total calls: {self.total_calls} 114 | Attempts: {self.attempts} 115 | Total cost: {self.total_cost} 116 | ''' 117 | 118 | 119 | """ 120 | Source: https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties 121 | """ 122 | 123 | 124 | def rsetattr(obj, attr, val): 125 | pre, _, post = attr.rpartition('.') 126 | return setattr(rgetattr(obj, pre) if pre else obj, post, val) 127 | 128 | 129 | def rgetattr(obj, attr, *args): 130 | def _getattr(obj, attr): 131 | return getattr(obj, attr, *args) 132 | return functools.reduce(_getattr, [obj] + attr.split('.')) 133 | 134 | 135 | stats = MarshaStats() 136 | -------------------------------------------------------------------------------- /examples/data-oriented/notebook/data-mangling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "a4b0163f-e3e7-42cd-90a7-91d897905230", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "%pip install git+https://github.com/alantech/marsha" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "0270fd02-a572-4df9-a5c3-caa301a01714", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "%env OPENAI_SECRET_KEY=sk-..." 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "id": "18b4b4b3-fd42-40a8-97c7-98f05615ca52", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "marsha_filename = 'employee_skills.mrsh'\n", 31 | "with open(marsha_filename, 'w') as f:\n", 32 | " marsha_content = f'''\n", 33 | "# type EmployeesByDepartment ./employees_by_department.csv\n", 34 | "\n", 35 | "\n", 36 | "# type DepartmentSkills ./department_skills.csv\n", 37 | "\n", 38 | "\n", 39 | "# type EmployeeSkills\n", 40 | "name, skill\n", 41 | "Bob, math\n", 42 | "Jake, spreadsheets\n", 43 | "Lisa, coding\n", 44 | "Sue, spreadsheets\n", 45 | "\n", 46 | "\n", 47 | "# func get_employee_skills(list of EmployeesByDepartment, list of DepartmentSkills): list of EmployeeSkills\n", 48 | "\n", 49 | "This function receives a list of EmployeesByDepartment and a list of DepartmentSkills. \n", 50 | "The function should be able to create a response of EmployeeSkills merging the 2 list by department.\n", 51 | "Use the pandas library.\n", 52 | "\n", 53 | "* get_employee_skills() = throws an error\n", 54 | "* get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')]) = throws an error\n", 55 | "* get_employee_skills([], []) = []\n", 56 | "* get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], []) = []\n", 57 | "* get_employee_skills([], [DepartmentSkills('Accounting', 'math')]) = []\n", 58 | "* get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')]\n", 59 | "* get_employee_skills([EmployeesByDepartment('Joe', 'Accounting'), EmployeesByDepartment('Jake', 'Engineering')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')]\n", 60 | "* get_employee_skills([EmployeesByDepartment('Joe', 'Accounting'), EmployeesByDepartment('Jake', 'Engineering')], [DepartmentSkills('Accounting', 'math'), DepartmentSkills('Engineering', 'coding')]) = [EmployeeSkills('Joe', 'math'), EmployeeSkills('Jake', 'coding')]\n", 61 | "\n", 62 | "\n", 63 | "# func read_csv_file(path to file): file data without header\n", 64 | "\n", 65 | "This function read a CSV file and return the csv content without header.\n", 66 | "\n", 67 | "\n", 68 | "# func process_data(path to file with EmployeesByDepartment, path to file with DepartmentSkills): list of EmployeeSkills\n", 69 | "\n", 70 | "This function uses `read_csv_file` to read the 2 csv files received and create the respective lists. Make sure to strip and lower each string property coming from the csv. Then, call and return the result from `get_employee_skills`.\n", 71 | "\n", 72 | "* process_data('/pathA', '') = throws an error\n", 73 | "* process_data('/pathA', '/pathB') = [EmployeeSkills('Joe', 'math')]\n", 74 | "* process_data('/pathA', 'pathC') = [EmployeeSkills('Joe', 'math'), EmployeeSkills('Jake', 'coding')]\n", 75 | "'''\n", 76 | " f.write(marsha_content)\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "id": "20544e71-9869-4898-98bd-04b5431529bf", 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "!python -m marsha ./\"$marsha_filename\"" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "id": "0658fd25-948b-4c98-a443-4a5b63b18073", 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "# Install marsha generated code requirements\n", 97 | "%pip install -r requirements.txt" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "id": "3e69c211-20b1-47b5-8e4c-7c5c20589974", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "# Install additional dependencies\n", 108 | "%pip install matplotlib" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "id": "407a780a", 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "import pandas as pd\n", 119 | "import matplotlib.pyplot as plt\n", 120 | "\n", 121 | "from employee_skills import process_data\n", 122 | "\n", 123 | "employee_skills_list = process_data('./employees_by_department.csv', './department_skills.csv')\n", 124 | "employee_skills_df = pd.DataFrame([(e.name, e.skill) for e in employee_skills_list], columns=[\"Name\", \"Skill\"])\n", 125 | "skill_counts = employee_skills_df[\"Skill\"].value_counts()\n", 126 | "\n", 127 | "plt.figure(figsize=(8, 4))\n", 128 | "plt.pie(skill_counts, labels=skill_counts.index, autopct=\"%1.1f%%\")\n", 129 | "plt.title(\"Employee Skills\")\n", 130 | "plt.show()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "id": "daa6ebc5-1bd9-455e-beb9-dabfbc4ca1f4", 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "Python 3 (ipykernel)", 145 | "language": "python", 146 | "name": "python3" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 3 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython3", 158 | "version": "3.10.12" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 5 163 | } 164 | -------------------------------------------------------------------------------- /marsha/parse.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from mistletoe import Document, ast_renderer 4 | 5 | from marsha.meta import MarshaMeta, to_markdown 6 | from marsha.utils import write_file 7 | 8 | 9 | def format_marsha_for_llm(meta: MarshaMeta): 10 | break_line = '\n' 11 | res = [f'# Requirements for file `{meta.filename}`'] 12 | for func in meta.functions + meta.void_funcs: 13 | ast = ast_renderer.get_ast(Document(func)) 14 | if ast['children'][0]['type'] != 'Heading': 15 | raise Exception('Invalid Marsha function') 16 | name = '' 17 | args = [] 18 | ret = '' 19 | desc_parts = [] 20 | reqs = '' 21 | list_started = False 22 | for (i, child) in enumerate(ast['children']): 23 | if i == 0: 24 | # Special handling for the initial header (for now) 25 | if child['type'] != 'Heading': 26 | raise Exception('Invalid Marsha function') 27 | header = child['children'][0]['content'] 28 | name = header.split('(')[0].split('func')[1].strip() 29 | args = [arg.strip() 30 | for arg in header.split('(')[1].split(')')[0].split(',')] 31 | end = header.split('):') 32 | if len(end) == 1: 33 | ret = 'None' 34 | else: 35 | ret = header.split('):')[1].strip() 36 | continue 37 | if child['type'] == 'List': 38 | list_started = True 39 | reqs = to_markdown(child) 40 | continue 41 | if list_started: 42 | raise Exception( 43 | 'Function description must come *before* usage examples') 44 | desc_parts.append(to_markdown(child)) 45 | desc = '\n\n'.join(desc_parts) 46 | 47 | arg_fmt = '\n'.join( 48 | [f'{i + 1}. {arg}' for (i, arg) in enumerate(args)]) 49 | 50 | fn_def = f'''## Requirements for function `{name}` 51 | 52 | ### Inputs 53 | 54 | {arg_fmt} 55 | 56 | ### Output 57 | 58 | {ret} 59 | 60 | ### Description 61 | 62 | {desc} 63 | 64 | {f"""### Examples of expected behavior 65 | 66 | {reqs}""" if len(reqs) > 0 else ''} 67 | ''' 68 | res.append(fn_def) 69 | if meta.types is not None: 70 | res.append('## Convert the following type into classes') 71 | for defined_type in meta.types: 72 | type_def = f''' 73 | ##{defined_type} 74 | ''' 75 | res.append(type_def) 76 | return break_line.join(res) 77 | 78 | 79 | # TODO: Potentially re-org this so the stages are together? 80 | def validate_first_stage_markdown(md, marsha_filename): 81 | ast = ast_renderer.get_ast(Document(md)) 82 | if len(ast['children']) != 4 and len(ast['children']) != 6: 83 | return False 84 | if len(ast['children']) == 4: 85 | if ast['children'][0]['type'] != 'Heading': 86 | return False 87 | if ast['children'][2]['type'] != 'Heading': 88 | return False 89 | if ast['children'][1]['type'] != 'CodeFence': 90 | return False 91 | if ast['children'][3]['type'] != 'CodeFence': 92 | return False 93 | if ast['children'][0]['children'][0]['content'].strip() != f'{marsha_filename}.py': 94 | return False 95 | if ast['children'][2]['children'][0]['content'].strip() != f'{marsha_filename}_test.py': 96 | return False 97 | else: 98 | if ast['children'][0]['type'] != 'Heading': 99 | return False 100 | if ast['children'][2]['type'] != 'Heading': 101 | return False 102 | if ast['children'][4]['type'] != 'Heading': 103 | return False 104 | if ast['children'][1]['type'] != 'CodeFence': 105 | return False 106 | if ast['children'][3]['type'] != 'CodeFence': 107 | return False 108 | if ast['children'][5]['type'] != 'CodeFence': 109 | return False 110 | if ast['children'][0]['children'][0]['content'].strip() != f'{marsha_filename}.py': 111 | return False 112 | if ast['children'][2]['children'][0]['content'].strip() != 'requirements.txt': 113 | return False 114 | if ast['children'][4]['children'][0]['content'].strip() != f'{marsha_filename}_test.py': 115 | return False 116 | return True 117 | 118 | 119 | def validate_second_stage_markdown(md, filename): 120 | ast = ast_renderer.get_ast(Document(md)) 121 | if len(ast['children']) != 2: 122 | return False 123 | if ast['children'][0]['type'] != 'Heading': 124 | return False 125 | if ast['children'][1]['type'] != 'CodeFence': 126 | return False 127 | if ast['children'][0]['children'][0]['content'].strip() != filename: 128 | return False 129 | return True 130 | 131 | 132 | def write_files_from_markdown(md: str, subdir=None) -> list[str]: 133 | ast = ast_renderer.get_ast(Document(md)) 134 | filenames = [] 135 | filename = '' 136 | filedata = '' 137 | for section in ast['children']: 138 | if section['type'] == 'Heading': 139 | filename = section['children'][0]['content'] 140 | if subdir is not None: 141 | filename = f'{subdir}/{filename}' 142 | filenames.append(filename) 143 | elif section['type'] == 'CodeFence': 144 | filedata = section['children'][0]['content'] 145 | if filedata is None or filedata == '': 146 | # If theres not data and we are not going to write the file, we should remove it from the filenames list 147 | filenames.pop() 148 | continue 149 | if subdir is not None: 150 | os.makedirs(os.path.dirname(filename), exist_ok=True) 151 | write_file(filename, filedata) 152 | return filenames 153 | 154 | 155 | def extract_func_name(type) -> str: 156 | ast = ast_renderer.get_ast(Document(type)) 157 | if ast['children'][0]['type'] != 'Heading': 158 | raise Exception('Invalid Marsha function') 159 | header = ast['children'][0]['children'][0]['content'] 160 | return header.split('(')[0].split('func')[1].strip() 161 | -------------------------------------------------------------------------------- /marsha/base.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import os 4 | import openai 5 | import tempfile 6 | import time 7 | import traceback 8 | 9 | from marsha.llm import generate_python_code, review_and_fix 10 | from marsha.meta import MarshaMeta 11 | from marsha.parse import write_files_from_markdown 12 | from marsha.stats import stats 13 | from marsha.utils import read_file, copy_file, add_helper, copy_tree, prettify_time_delta 14 | 15 | # Set up OpenAI 16 | openai.organization = os.getenv('OPENAI_ORG') 17 | openai.api_key = os.getenv('OPENAI_SECRET_KEY') 18 | 19 | # Parse the input arguments 20 | parser = argparse.ArgumentParser( 21 | prog='marsha', 22 | description='Marsha AI Compiler', 23 | ) 24 | parser.add_argument('source') 25 | parser.add_argument('-d', '--debug', action='store_true', 26 | help='Turn on debug logging') 27 | parser.add_argument('-q', '--quick-and-dirty', action='store_true', 28 | help='Code generation with no correction stages run') 29 | parser.add_argument('-a', '--attempts', type=int, default=1) 30 | parser.add_argument('-n', '--n-parallel-executions', type=int, default=3) 31 | parser.add_argument('--exclude-main-helper', action='store_true', 32 | help='Skips addition of helper code for running as a script') 33 | parser.add_argument('--exclude-sanity-check', action='store_true', 34 | help='Skips an initial sanity check that function defintions will reliably generate working code') 35 | parser.add_argument('-s', '--stats', action='store_true', 36 | help='Save stats and write them to a file') 37 | 38 | args = parser.parse_args() 39 | 40 | 41 | async def main(): 42 | t1 = time.time() 43 | input_file = args.source 44 | # Name without extension 45 | meta = await MarshaMeta(input_file).populate() 46 | print(f'Compiling functions for {meta.filename}...') 47 | quick_and_dirty = args.quick_and_dirty 48 | debug = args.debug 49 | should_write_stats = args.stats 50 | attempts = args.attempts 51 | n_results = args.n_parallel_executions 52 | if args.debug: 53 | print(f'Number of attempts: {attempts}') 54 | print(f'Number of parallel executions: {n_results}') 55 | while attempts: 56 | attempts = attempts - 1 57 | # First stage: generate code for functions and classes 58 | try: 59 | mds = await generate_python_code(args, meta, n_results, debug) 60 | except Exception: 61 | continue 62 | # Early exit if quick and dirty 63 | if quick_and_dirty: 64 | print('Writing generated code to files...') 65 | for md in mds[:2]: 66 | write_files_from_markdown(md) 67 | attempts = attempts + 1 68 | break 69 | # Writing generated code to temporary files in preparation for next stages 70 | file_groups = list() 71 | tmp_directories = [] 72 | for idx, md in enumerate(mds): 73 | print('Writing generated code to temporary files...') 74 | tmpdir = tempfile.TemporaryDirectory( 75 | suffix=f'_-_{meta.filename}_{idx}') 76 | tmp_directories.append(tmpdir) 77 | file_groups = file_groups + \ 78 | [write_files_from_markdown( 79 | md, subdir=tmpdir.name)] 80 | if args.debug: 81 | for filename in [filename for file_group in file_groups for filename in file_group]: 82 | print(f'# {filename}\n{read_file(filename)}\n') 83 | # Create tasks to run in parallel using asyncio 84 | tasks = [] 85 | for file_group in file_groups: 86 | tasks.append(asyncio.create_task( 87 | review_and_fix(args, meta, file_group, debug), name=file_group[0])) 88 | try: 89 | done_task_name = await run_parallel_tasks(tasks) 90 | print('Writing generated code to files...') 91 | filename = done_task_name 92 | copy_file(filename, f'{meta.filename}.py') 93 | if not args.exclude_main_helper: 94 | add_helper(f'{meta.filename}.py') 95 | test_filename = filename.replace('.py', '_test.py') 96 | copy_file(test_filename, f'{meta.filename}_test.py') 97 | directory = os.path.dirname(filename) 98 | requirements_filename = os.path.join( 99 | directory, 'requirements.txt') 100 | if os.path.exists(requirements_filename): 101 | copy_file(requirements_filename, 'requirements.txt') 102 | except Exception as e: 103 | print('Failed to generate working code.') 104 | print(e) 105 | if args.debug: 106 | traceback.print_tb(e.__traceback__) 107 | # Copy the temporary directories to a new directory for debugging 108 | for tmpdir in tmp_directories: 109 | tmpdir_suffix = tmpdir.name.split('_-_')[-1] 110 | copy_tree(tmpdir.name, f'{tmpdir_suffix}_failed') 111 | print('Retrying...') 112 | continue 113 | finally: 114 | cleanup_tmp_directories(tmp_directories) 115 | # Done! Add one back to `attempts` to avoid accidentally erroring out on success 116 | attempts = attempts + 1 117 | break 118 | if attempts == 0: 119 | t2 = time.time() 120 | stats.aggregate(prettify_time_delta(t2 - t1), args.attempts) 121 | if should_write_stats: 122 | stats.to_file() 123 | raise Exception( 124 | f'Failed to generate working code for {meta.filename}. Total time elapsed: {prettify_time_delta(t2 - t1)}. Total cost: {round(stats.total_cost, 2)}.') 125 | t2 = time.time() 126 | stats.aggregate(prettify_time_delta(t2 - t1), args.attempts - attempts + 1) 127 | if should_write_stats: 128 | stats.to_file() 129 | print( 130 | f'{meta.filename} done! Total time elapsed: {prettify_time_delta(t2 - t1)}. Total cost: {round(stats.total_cost, 2)}.') 131 | 132 | 133 | async def run_parallel_tasks(tasks: list) -> str: 134 | print('Running tasks in parallel...') 135 | done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) 136 | done_task = done.pop() 137 | if done_task.exception() is None: 138 | print('Task completed successfully. Cancelling pending tasks...') 139 | for task in pending if pending is not None else []: 140 | task.cancel() 141 | return done_task.get_name() 142 | elif len(pending) > 0: 143 | print('Task completed with error. Waiting for pending tasks to finish...') 144 | return await run_parallel_tasks(pending) 145 | else: 146 | print('All tasks failed. Raising exception...') 147 | if done_task is not None and done_task.exception() is not None: 148 | raise done_task.exception() 149 | raise Exception('All tasks failed.') 150 | 151 | 152 | def cleanup_tmp_directories(tmp_directories: list): 153 | for tmp_directory in tmp_directories: 154 | try: 155 | tmp_directory.cleanup() 156 | except Exception: 157 | pass 158 | -------------------------------------------------------------------------------- /marsha/helper.py: -------------------------------------------------------------------------------- 1 | 2 | if __name__ == '__main__': 3 | import argparse 4 | import json 5 | lookup = globals() 6 | func_names = [r.__name__ for r in lookup.values() if callable(r)] 7 | default_func = func_names[-1] 8 | parser = argparse.ArgumentParser( 9 | description='Marsha-generated CLI options') 10 | parser.add_argument('-c', '--func', action='store', required=False, choices=func_names, default=default_func, 11 | help='Specifies the function to call. Defaults to the last defined function') 12 | parser.add_argument('-j', '--force-json', action='store_true', required=False, 13 | help='Forces arguments, files, or stdin to be parsed as JSON') 14 | parser.add_argument('-t', '--force-text', action='store_true', required=False, 15 | help='Forces arguments, files, or stdin to be parsed as raw text') 16 | parser.add_argument('-i', '--stdin', action='store_true', required=False, 17 | help='Ignores CLI parameters in favor of stdin (as a single parameter)') 18 | parser.add_argument('-f', '--infile', action='store', required=False, default=None, 19 | help='Ignores CLI parameters in favor of reading the specified file (as a single parameter)') 20 | parser.add_argument('-o', '--outfile', action='store', required=False, default=None, 21 | help='Saves the result to a file instead of stdout') 22 | parser.add_argument('-s', '--serve', action='store', required=False, type=int, 23 | help='Spins up a simple REST web server on the specified port. When used all other options are ignored') 24 | parser.add_argument( 25 | 'params', nargs='*', help='Arguments to be provided to the function being run. Optimistically converted to simple python types by default, and left as strings if not possible') 26 | args = parser.parse_args() 27 | func = lookup[args.func] 28 | if args.serve is not None: 29 | from http.server import BaseHTTPRequestHandler, HTTPServer 30 | 31 | class MarshaServer(BaseHTTPRequestHandler): 32 | def do_GET(self): 33 | func_name = self.path.split('/')[1] 34 | if func_name not in func_names: 35 | self.send_response(404) 36 | self.send_header('Content-Type', 'application/json') 37 | self.end_headers() 38 | self.wfile.write( 39 | bytes('{"error": "' + self.path + ' does not exist"}', 'utf-8')) 40 | return 41 | func = lookup[func_name] 42 | if func.__code__.co_argcount != 0: 43 | self.send_response(400) 44 | self.send_header('Content-Type', 'application/json') 45 | self.end_headers() 46 | self.wfile.write( 47 | bytes('{"error": "' + self.path + ' is not a GET path"}', 'utf-8')) 48 | return 49 | out = func() 50 | self.send_response(200) 51 | self.send_header('Content-Type', 'application/json') 52 | self.end_headers() 53 | self.wfile.write(bytes(json.dumps(out), 'utf-8')) 54 | 55 | def do_POST(self): 56 | func_name = self.path.split('/')[1] 57 | if func_name not in func_names: 58 | self.send_response(404) 59 | self.send_header('Content-Type', 'application/json') 60 | self.end_headers() 61 | self.wfile.write( 62 | bytes('{"error": "' + self.path + ' does not exist"}', 'utf-8')) 63 | return 64 | func = lookup[func_name] 65 | content_len = int(self.headers.get('Content-Length', 0)) 66 | post_body = self.rfile.read(content_len) 67 | post_payload = None 68 | is_json = self.headers.get_content_type() == 'application/json' 69 | if is_json: 70 | try: 71 | post_payload = json.loads(post_body) 72 | except Exception: 73 | self.send_response(400) 74 | self.send_header('Content-Type', 'application/json') 75 | self.end_headers() 76 | self.wfile.write( 77 | bytes('{"error": "Invalid JSON provided"}', 'utf-8')) 78 | return 79 | else: 80 | post_payload = post_body.decode('utf-8') 81 | out = None 82 | try: 83 | if type(post_payload) is list: 84 | out = func(*post_payload) 85 | else: 86 | out = func(post_payload) 87 | self.send_response(200) 88 | except Exception as e: 89 | self.send_response(400) 90 | if is_json: 91 | self.send_header('Content-Type', 'application/json') 92 | self.end_headers() 93 | self.wfile.write( 94 | bytes('{"error": "' + str(e) + '"}', 'utf-8')) 95 | else: 96 | self.send_header('Content-Type', 'text/plain') 97 | self.end_headers() 98 | self.wfile.write(bytes(str(e), 'utf-8')) 99 | return 100 | if is_json: 101 | self.send_header('Content-Type', 'application/json') 102 | self.end_headers() 103 | self.wfile.write(bytes(json.dumps(out), 'utf-8')) 104 | else: 105 | self.send_header('Content-Type', 'text/plain') 106 | self.end_headers() 107 | self.wfile.write(bytes(out, 'utf-8')) 108 | server = HTTPServer(('', args.serve), MarshaServer) 109 | print(f'Listening on port {args.serve}') 110 | try: 111 | server.serve_forever() 112 | except KeyboardInterrupt: 113 | pass 114 | 115 | server.server_close() 116 | print("Server stopped.") 117 | else: 118 | out = None 119 | parsed_param = None 120 | as_json = False 121 | if args.stdin: 122 | import sys 123 | param = sys.stdin.read() 124 | if args.force_json: 125 | parsed_param = json.loads(param) 126 | as_json = True 127 | elif args.force_text: 128 | parsed_param = param 129 | as_json = False 130 | else: 131 | try: 132 | parsed_param = json.loads(param) 133 | as_json = True 134 | except Exception: 135 | parsed_param = param 136 | as_json = False 137 | elif args.infile is not None: 138 | file = open(args.infile, 'r') 139 | param = file.read() 140 | file.close() 141 | if args.force_json: 142 | parsed_param = json.loads(param) 143 | as_json = True 144 | elif args.force_text: 145 | parsed_param = param 146 | as_json = False 147 | else: 148 | try: 149 | parsed_param = json.loads(param) 150 | as_json = True 151 | except Exception: 152 | parsed_param = param 153 | as_json = False 154 | else: 155 | if args.force_json: 156 | parsed_param = [json.loads(param) for param in args.params] 157 | as_json = True 158 | elif args.force_text: 159 | parsed_param = args.params 160 | as_json = False 161 | else: 162 | parsed_param = [] 163 | as_json = False 164 | for param in args.params: 165 | try: 166 | parsed = json.loads(param) 167 | parsed_param.append(parsed) 168 | as_json = True 169 | except Exception: 170 | parsed_param.append(param) 171 | if type(parsed_param) is list: 172 | out = func(*parsed_param) 173 | else: 174 | out = func(parsed_param) 175 | if args.outfile is not None: 176 | file = open(args.outfile, 'w') 177 | file.write(json.dumps(out) if as_json else out) 178 | file.close() 179 | else: 180 | print(json.dumps(out) if as_json else out) 181 | -------------------------------------------------------------------------------- /marsha/meta.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from mistletoe import Document, ast_renderer 5 | 6 | from marsha.utils import read_file, get_filename_from_path 7 | 8 | 9 | def to_markdown(node): 10 | # Technically I should iterate on the `children` lists every time because they could have more 11 | # than one, but since this is hardwired for each node type, I'm just going to use the actual 12 | # implementations to skip that when possible to reduce recursion depth and simplify the code 13 | if node['type'] == 'AutoLink': 14 | return f'''[{node['children'][0]['content']}]''' 15 | if node['type'] == 'BlockCode': 16 | return '\n'.join([f''' {line}''' for line in node['children'][0].split('\n')]) 17 | if node['type'] == 'CodeFence': 18 | return f'''```{node['language']} 19 | {node['children'][0]['content']} 20 | ```''' 21 | if node['type'] == 'Document': 22 | return ''.join([to_markdown(child) for child in node['children']]) 23 | if node['type'] == 'Emphasis': 24 | return f'''*{node['children'][0]['content']}*''' 25 | if node['type'] == 'EscapeSequence': 26 | return f'''\\{node['children'][0]['content']}''' 27 | if node['type'] == 'Heading': 28 | return ('#' * node['level']) + ' ' + ''.join([to_markdown(child) for child in node['children']]) 29 | if node['type'] == 'Image': 30 | if len(node['title']['children'][0]['content']) > 0: 31 | return f'''![{''.join([to_markdown(child) for child in node['children']])}]({node['src']['children'][0]['content']} "{node['title']['children'][0]['content']}")''' 32 | else: 33 | return f'''![{''.join([to_markdown(child) for child in node['children']])}]({node['src']['children'][0]['content']})''' 34 | if node['type'] == 'InlineCode': 35 | return f'''`{node['children'][0]['content']}`''' 36 | if node['type'] == 'LineBreak': 37 | return '\n' 38 | if node['type'] == 'Link': 39 | if len(node['title']['children'][0]['content']) > 0: 40 | return f'''[{''.join([to_markdown(child) for child in node['children']])}]({node['src']['children'][0]['content']} "{node['title']['children'][0]['content']}")''' 41 | else: 42 | return f'''[{''.join([to_markdown(child) for child in node['children']])}]({node['src']['children'][0]['content']})''' 43 | if node['type'] == 'List': 44 | if node['start'] is not None: 45 | return '\n'.join([f'''{i}. {text}''' for (i, text) in enumerate([to_markdown(child) for child in node['children']])]) 46 | else: 47 | return '\n'.join([f'''* {to_markdown(child)}''' for child in node['children']]) 48 | if node['type'] == 'ListItem': 49 | return ''.join([to_markdown(child) for child in node['children']]) 50 | if node['type'] == 'Paragraph': 51 | return ''.join([to_markdown(child) for child in node['children']]) 52 | if node['type'] == 'Quote': 53 | return '\n'.join([f'''> {to_markdown(child)}''' for child in node['children']]) 54 | if node['type'] == 'RawText': 55 | return node['content'] 56 | if node['type'] == 'SetextHeading': 57 | raise NotImplementedError() 58 | if node['type'] == 'Strikethrough': 59 | return f'''~~{node['children'][0]['content']}~~''' 60 | if node['type'] == 'Strong': 61 | return f'''**{node['children'][0]['content']}**''' 62 | if node['type'] == 'Table': 63 | raise NotImplementedError() 64 | if node['type'] == 'TableCell': 65 | raise NotImplementedError() 66 | if node['type'] == 'TableRow': 67 | raise NotImplementedError() 68 | if node['type'] == 'ThematicBreak': 69 | return '\n---\n' 70 | raise Exception(f'''Unknown AST node {node['type']} encountered!''') 71 | 72 | 73 | def validate_marsha_fn(fn: str, void: bool = False): 74 | ast = ast_renderer.get_ast(Document(fn)) 75 | fn_heading = ast['children'][0]['children'][0]['content'] 76 | # Check function signature 77 | if not void: 78 | return_type = fn_heading.split('):')[1].strip() 79 | if not return_type or return_type is None or return_type == '': 80 | raise Exception( 81 | f'Invalid Marsha function: Missing return type for `{fn_heading}`.') 82 | # Check description 83 | if ast['children'][1]['type'] != 'Paragraph': 84 | raise Exception( 85 | f'Invalid Marsha function: Invalid description for `{fn_heading}`.') 86 | # Check usage examples if not void first because we need to check the length later 87 | if not void: 88 | if ast['children'][-1]['type'] != 'List': 89 | raise Exception( 90 | f'Invalid Marsha function: Invalid usage examples for `{fn_heading}`.') 91 | if len(ast['children'][-1]['children']) < 2: # We need at least a couple of examples 92 | raise Exception( 93 | f'Invalid Marsha function: Not enough usage examples for `{fn_heading}`.') 94 | # Extract content from all children and nested children except header and examples if any 95 | fn_desc = '' 96 | range_stop = len(ast['children']) - 1 if not void else len(ast['children']) 97 | for i in range(1, range_stop): 98 | for child in ast['children'][i]['children']: 99 | fn_desc += to_markdown(child) 100 | if len(fn_desc) <= 80: # around a couple of sentences at least 101 | raise Exception( 102 | f'Invalid Marsha function: Description for `{fn_heading}` is too short.') 103 | 104 | 105 | def validate_marsha_type(type: str): 106 | ast = ast_renderer.get_ast(Document(type)) 107 | 108 | if len(ast['children']) == 1: 109 | type_heading = ast['children'][0]['children'][0]['content'] 110 | if len(type_heading.split(' ')) != 3: 111 | raise Exception( 112 | f'Invalid Marsha type: Invalid type definition for `{type_heading}`.') 113 | else: 114 | type_heading = ast['children'][0]['children'][0]['content'] 115 | if ast['children'][1]['type'] != 'Paragraph': 116 | raise Exception( 117 | f'Invalid Marsha type: Invalid type definition for `{type_heading}`.') 118 | type_def_samples = filter(lambda x: x['type'] == 119 | 'RawText', ast['children'][1]['children']) 120 | if len(list(type_def_samples)) <= 2: # We need at least the headers and a couple of examples 121 | raise Exception( 122 | f'Invalid Marsha type: Not enough examples for `{type_heading}`.') 123 | 124 | 125 | def extract_functions_and_types(file: str) -> tuple[list[str], list[str], list[str]]: 126 | res = ([], [], []) 127 | sections = file.split('#') 128 | func_regex = r'\s*func [a-zA-Z_][a-zA-Z0-9_]*\(.*\):' 129 | void_func_regex = r'\s*func [a-zA-Z_][a-zA-Z0-9_]*\(.*\)' 130 | type_regex = r'\s*type [a-zA-Z_][a-zA-Z0-9_]*\s*[a-zA-Z0-9_\.\/]*' 131 | for section in sections: 132 | if re.match(void_func_regex, section) and not re.match(func_regex, section): 133 | void_func_str = f'# {section.lstrip()}' 134 | validate_marsha_fn(void_func_str, True) 135 | res[2].append(void_func_str) 136 | elif re.match(func_regex, section): 137 | func_str = f'# {section.lstrip()}' 138 | validate_marsha_fn(func_str) 139 | res[0].append(func_str) 140 | elif re.match(type_regex, section): 141 | type_str = f'# {section.lstrip()}' 142 | validate_marsha_type(type_str) 143 | res[1].append(type_str) 144 | if len(res[0]) == 0 and len(res[2]) == 0 and len(res[1]) == 0: 145 | raise Exception('No functions or types found in file') 146 | return res 147 | 148 | 149 | async def process_types(raw_types: list[str], dirname: str) -> list[str]: 150 | types_defined = [] 151 | for raw_type in raw_types: 152 | type_name = extract_type_name(raw_type) 153 | # If type is defined from a file, read the file 154 | if is_defined_from_file(raw_type): 155 | print('Reading type from file...') 156 | filename = extract_type_filename(raw_type) 157 | full_path = f'{dirname}/{filename}' 158 | try: 159 | type_data = read_file(full_path) 160 | except Exception: 161 | err = f'Failed to read file: {full_path}' 162 | # if args.debug: 163 | # print(err) 164 | # print(e) 165 | raise Exception(err) 166 | raw_type = f'''# type {type_name} 167 | {type_data} 168 | ''' 169 | types_defined.append(raw_type) 170 | return types_defined 171 | 172 | 173 | def extract_type_name(type): 174 | ast = ast_renderer.get_ast(Document(type)) 175 | if ast['children'][0]['type'] != 'Heading': 176 | raise Exception('Invalid Marsha type') 177 | header = ast['children'][0]['children'][0]['content'] 178 | return header.split(' ')[1].strip() 179 | 180 | 181 | def is_defined_from_file(md): 182 | ast = ast_renderer.get_ast(Document(md)) 183 | if len(ast['children']) != 1: 184 | return False 185 | if ast['children'][0]['type'] != 'Heading': 186 | return False 187 | header = ast['children'][0]['children'][0]['content'] 188 | split_header = header.split(' ') 189 | if len(split_header) != 3: 190 | return False 191 | return True 192 | 193 | 194 | def extract_type_filename(md): 195 | ast = ast_renderer.get_ast(Document(md)) 196 | header = ast['children'][0]['children'][0]['content'] 197 | return header.split(' ')[2] 198 | 199 | 200 | class MarshaMeta(): 201 | def __init__(self, input_file): 202 | self.input_file = input_file 203 | 204 | async def populate(self): 205 | marsha_file_dirname = os.path.dirname(self.input_file) 206 | self.filename = get_filename_from_path(self.input_file) 207 | self.content = read_file(self.input_file) 208 | self.functions, types, self.void_funcs = extract_functions_and_types( 209 | self.content) 210 | self.types = None 211 | # Pre-process types in case we need to open a file to get the type definition 212 | if len(types) > 0: 213 | self.types = await process_types(types, marsha_file_dirname) 214 | 215 | return self 216 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Marsha AI Language 2 | 3 | [![Discord Follow](https://dcbadge.vercel.app/api/server/p5BTaWAdjm?style=flat)](https://discord.gg/p5BTaWAdjm) 4 | [![GitHub Repo Stars](https://img.shields.io/github/stars/alantech/marsha?style=social)](https://github.com/alantech/marsha) 5 | 6 |

Describe Logic ⴲ Provide Examples ⴲ Run Reliably

7 | 8 | Marsha is an LLM-based programming language. Describe what you want done with a simple syntax, provide examples of usage, and the Marsha compiler will guide an LLM to produce tested Python software. 9 | 10 | ## Usage 11 | 12 | The Marsha compiler can be used to compile the syntax using a `pip` module via a terminal or Jupyter Notebook: 13 | 14 | ```bash 15 | pip install git+https://github.com/alantech/marsha 16 | python -m marsha data_mangling.mrsh 17 | ``` 18 | 19 | ## Syntax 20 | 21 | The Marsha syntax looks a lot like markdown and is a mixture of English and mathematical notation. It has its own file format `.mrsh` that houses function definition(s). The syntax is subject to change as Marsha is currently in an alpha state. If you have a legitimate use case for Marsha, please let us know. 22 | 23 | ### Data Types 24 | 25 | Data types provide function type safety which helps improve the accuracy of the code generation. The data type format is almost identical to the CSV format. 26 | 27 | ```md 28 | # type EmployeeSkills 29 | name, skill 30 | Bob, math 31 | Jake, spreadsheets 32 | Lisa, coding 33 | Sue, spreadsheets 34 | ``` 35 | 36 | It is also possible for Marsha to infer the data type from CSV file 37 | 38 | ```md 39 | # type EmployeesByDepartment employees_by_department.csv 40 | ``` 41 | 42 | ### Functions 43 | 44 | Functions are the bread and butter of Marsha and can easily define transformations between different data types. There are three sections to a Marsha function: the declaration, the description, and the examples. 45 | 46 | The declaration is a Markdown heading section prefixed with `func`, then followed by a name, parenthesis containing the input type(s), and finally a colon followed by the output type. The name must be a single word, but the types don't need to be classic software types, or even the explicit data types defined above. They can themselves be simple descriptions of what the type is meant to be. Eg, 47 | 48 | ```md 49 | # func get_employee_skills(list of EmployeesByDepartment, list of DepartmentSkills): list of EmployeeSkills 50 | ``` 51 | 52 | The next section is the description of the function. Here you explain what the function should do. Being more explicit here will reduce variability in the generated output and improve reliability in behavior, but it's up to you just how explicit you will be and how much you leave to the LLM to figure out. This is similar to declarative languages like SQL and HTML where there are defaults for things you do not specify, like the sort order of `select` statements or the default styling of a `
`. Eg, 53 | 54 | ```md 55 | This function receives a list of EmployeesByDepartment and a list of DepartmentSkills. The function should be able to create a response of EmployeeSkills merging the 2 list by department. Use the pandas library. 56 | ``` 57 | 58 | The final section is the example section. Here you provide examples of calling the function and what its output should be. Marsha uses this to provide more information to the LLM to generate the logic you want, but also uses it to generate a test suite to validate that what it has generated actually does what you want it to. This feedback loop makes Marsha more reliable than directly using the LLM itself. In some ways, this is similar to Constraint-based programming languages where you validate and verify the behavior of your function in the definition of the function itself, but it is also less stringent than those, allowing incomplete constraints where constraint-based languages will fail to compile in the face of that ambiguity. Eg, 59 | 60 | ```md 61 | * get_employee_skills() = throws an error 62 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')]) = throws an error 63 | * get_employee_skills([], []) = [] 64 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], []) = [] 65 | * get_employee_skills([], [DepartmentSkills('Accounting', 'math')]) = [] 66 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')] 67 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting'), EmployeesByDepartment('Jake', 'Engineering')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')] 68 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting'), EmployeesByDepartment('Jake', 'Engineering')], [DepartmentSkills('Accounting', 'math'), DepartmentSkills('Engineering', 'coding')]) = [EmployeeSkills('Joe', 'math'), EmployeeSkills('Jake', 'coding')] 69 | ``` 70 | 71 | Altogether this produces: 72 | 73 | ```md 74 | # func get_employee_skills(list of EmployeesByDepartment, list of DepartmentSkills): list of EmployeeSkills 75 | 76 | This function receives a list of EmployeesByDepartment and a list of DepartmentSkills. The function should be able to create a response of EmployeeSkills merging the 2 list by department. Use the pandas library. 77 | 78 | * get_employee_skills() = throws an error 79 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')]) = throws an error 80 | * get_employee_skills([], []) = [] 81 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], []) = [] 82 | * get_employee_skills([], [DepartmentSkills('Accounting', 'math')]) = [] 83 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')] 84 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting'), EmployeesByDepartment('Jake', 'Engineering')], [DepartmentSkills('Accounting', 'math')]) = [EmployeeSkills('Joe', 'math')] 85 | * get_employee_skills([EmployeesByDepartment('Joe', 'Accounting'), EmployeesByDepartment('Jake', 'Engineering')], [DepartmentSkills('Accounting', 'math'), DepartmentSkills('Engineering', 'coding')]) = [EmployeeSkills('Joe', 'math'), EmployeeSkills('Jake', 'coding')] 86 | ``` 87 | 88 | ### Goals 89 | 90 | The Marsha syntax is meant to be: 91 | - minimal and "obvious", but also discourage lax or incomplete information that could lead to unpredictable behavior 92 | - be mechanically parseable for syntax highlighting and quick feedback on correctness issues to the user 93 | - make it easy to define examples to reduce the probability of generating faulty code and allow generating tests that the application code can be tested against 94 | 95 | ## Compiler 96 | 97 | Marsha is compiled by an LLM into tested software that meets the requirements described, but implementation details can vary greatly across runs much like if different developers implemented it for you. There is typically more than one way to write software that fulfills a set of requirements. However, the compiler is best-effort and sometimes it will fail to generate the described program. We aim for 80%+ accuracy on our [examples](./examples/). In general, the more detailed the description and the more examples are provided the more likely the output will work. 98 | 99 | In order to use the compiler, the following environment variables must be set: 100 | 101 | * `OPENAI_ORG` 102 | * `OPENAI_SECRET_KEY` 103 | 104 | Support for other LLMs, including running something locally, is planned but not yet implemented. 105 | 106 | There are also a few flags on how to use Marsha: 107 | 108 | ```sh 109 | $ marsha --help 110 | usage: marsha [-h] [-d] [-q] [-a ATTEMPTS] [-n N_PARALLEL_EXECUTIONS] [--exclude-main-helper] [-s] source 111 | 112 | Marsha AI Compiler 113 | 114 | positional arguments: 115 | source 116 | 117 | options: 118 | -h, --help show this help message and exit 119 | -d, --debug Turn on debug logging 120 | -q, --quick-and-dirty 121 | Code generation with no correction stages run 122 | -a ATTEMPTS, --attempts ATTEMPTS 123 | -n N_PARALLEL_EXECUTIONS, --n-parallel-executions N_PARALLEL_EXECUTIONS 124 | --exclude-main-helper 125 | Skips addition of helper code for running as a script 126 | -s, --stats Save stats and write them to a file 127 | ``` 128 | 129 | * `-d` adds a significant amount of debug information to the screen. Probably not useful if you're not working on Marsha itself. 130 | * `-q` runs only the initial code generation phase without any of the corrective feedback stages. This is significantly cheaper, but more likely to generate code that doesn't quite work. This could be useful if you're using Marsha like Github Copilot or directly asking for code from ChatGPT, but with the Marsha syntax providing some more structure to produce a better result than you might if simply given a blank screen to write into. 131 | * `-a` The number of times marsha should attempt to compile your program, defaulting to just once. If set to more than 1, on a failure it will try again. For some trickier programs this might improve the ability to get working code at the cost of more LLM calls. 132 | * `-n` The number of parallel LLM threads of "thought" to pursue per attempt. This defaults to 3. When a path succeeds, all of the other paths are cancelled. 133 | * `-s` Save the stats that are printed by default to a file, instead. Probably not useful if you're not working on Marsha itself. 134 | * `--exclude-main-helper` Turns off the automatically generated code to make using your compiled Marsha code from the CLI easier, which is included by default. 135 | 136 | ## Using compiled Marsha code 137 | 138 | By default, Marsha appends logic to the generated Python code to make usage simpler, allowing you to invoke it from the CLI and potentially start a REST server. 139 | 140 | ```sh 141 | $ python -m duckduckgo --help 142 | usage: duckduckgo.py [-h] [-c {BeautifulSoup,duckduckgo}] [-j] [-t] [-i] [-f INFILE] [-o OUTFILE] [-s SERVE] [params ...] 143 | 144 | Marsha-generated CLI options 145 | 146 | positional arguments: 147 | params Arguments to be provided to the function being run. Optimistically converted to simple python types by default, and left as strings if not possible 148 | 149 | options: 150 | -h, --help show this help message and exit 151 | -c {BeautifulSoup,duckduckgo}, --func {BeautifulSoup,duckduckgo} 152 | Specifies the function to call. Defaults to the last defined function 153 | -j, --force-json Forces arguments, files, or stdin to be parsed as JSON 154 | -t, --force-text Forces arguments, files, or stdin to be parsed as raw text 155 | -i, --stdin Ignores CLI parameters in favor of stdin (as a single parameter) 156 | -f INFILE, --infile INFILE 157 | Ignores CLI parameters in favor of reading the specified file (as a single parameter) 158 | -o OUTFILE, --outfile OUTFILE 159 | Saves the result to a file instead of stdout 160 | -s SERVE, --serve SERVE 161 | Spins up a simple REST web server on the specified port. When used all other options are ignored 162 | ``` 163 | 164 | * `-c` Lets you choose which function within the generated code you wish to invoke. By default it selects the *last* function defined, as that is usually a "main-like" function. 165 | * `params` are all non-option arguments provided, in order, to the function you are invoking. 166 | * `-j` and `-t` let you choose if the param(s) provided will be parsed as JSON or kept as plain text. By default it will opportunistically parse the arguments but if it fails will keep it as text 167 | * `-i`, `-f`, and `-o` let you choose how input and output is managed. By default inputs are the `params` arguments and the output is to `stdout`, but you can use `-i` to then ignore all `params` and treat `stdin` as the singular input param for your function. Similarly `-f` will do the same, but for the file you specify, and `-o` will write the result to a file you specify instead of to `stdout`. 168 | * `-s` Is a flag to instead run a simple REST server. Using this flag causes it to ignore all other flags. The various function names become `/func_name` endpoints that you can POST to and get a response body back. If you set the `Content-Type` header to `application/json` the input and output will be JSON, if not it will be plain text. If your function takes mutliple arguments, it *must* be called in JSON mode with the arguments each being an element of a top-level array. 169 | 170 | ## Roadmap 171 | 172 | - Improve average accuracy for our test bed above 90% 173 | - Support for visualizations and data storage (geek mode: handle side-effect logic better in general) 174 | - Syntax highlighting (vim, vscode, etc) 175 | - Support for different types of LLM 176 | - Bootstrap the Marsha compiler with a Marsha program 177 | - More target languages other than Python 178 | - A module system 179 | - Edits to Marsha mutating existing Python code instead of regenerating 180 | - "Decompiler" from source code into Marsha syntax 181 | - "Debugger" meta mode to take existing Marsha definition and an example of an unexpected failure and recommend what to update with the Marsha definition. 182 | - Optmization "levels" (spend more time on more iterations with the LLM improving performance, security, etc) 183 | - Marsha GUI mode: visual editor baked into the compiler (eventually with the decompiler/debugger/etc features), and able to generate a GUI wrapper for generated code, enabling end-to-end non-terminal usage 184 | - Better support for a mixed environment (Marsha functions can be used by Python, but how to get Marsha to use hand-written Python functions) 185 | - Better "web scraping" behavior (LLM likes to assume the internet still looks like it did in November 2021, but HTML structure has often changed for the largest websites; automatically correcting that assumption would be nice) -------------------------------------------------------------------------------- /rfcs/001 - Marsha Syntax.md: -------------------------------------------------------------------------------- 1 | # 001 - Marsha Syntax 2 | 3 | ## Current Status 4 | 5 | ### Proposed 6 | 7 | 2023-05-25 8 | 9 | ### Accepted 10 | 11 | 2023-05-31 12 | 13 | #### Approvers 14 | 15 | - Luis de Pombo 16 | - Alejandro Guillen 17 | 18 | ### Implementation 19 | 20 | - [ ] Implemented: [One or more PRs](https://github.com/alantech/marsha/some-pr-link-here) YYYY-MM-DD 21 | - [ ] Revoked/Superceded by: [RFC ###](./000 - RFC Template.md) YYYY-MM-DD 22 | 23 | ## Author(s) 24 | 25 | - David Ellis 26 | 27 | ## Summary 28 | 29 | Marsha as a higher-level language is going to need a syntax. This syntax should be: 30 | - minimal, "obvious", and discourage lax or incomplete information that could lead to unpredictable behavior 31 | - be mechanically parseable for syntax highlighting and quick feedback on correctness issues to the user 32 | - make it easy to define examples and set different tolerance levels that will fail to compile if not enough examples are provided to reduce the probability of generating faulty code and provide the foundation for a test harness/suite itself 33 | 34 | For now, only function and data structure definitions are being considered. What other elements to create will depend on the initial target audience we intend to tackle, and since we're still debating that, we'll update this document once we're ready. 35 | 36 | Following the meta RFC, the functions will be a mixture of declarative and constraint style function definitions, and will have the following five parts: the function name, input arguments, return type, description, and examples of its usage. These should be enough for the LLM to do a pretty solid job at writing the actual code and the tests cases for us. 37 | 38 | ## Proposal 39 | 40 | ### Function Syntax Proposal 41 | 42 | For the function syntax, we're going with something that is brief, allows for fuzzy, ambiguous definition if desired, but has enough "hooks" to make parsing of the 5 component pieces unambiguous, along with the ability to switch to an unambiguous type definition if needed by the user. 43 | 44 | ```md 45 | # func fibonacci(integer): integer in the set of fibonacci numbers 46 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 47 | 48 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 49 | * fibonacci(1) = 1 50 | * fibonacci(2) = 1 51 | * fibonacci(3) = 2 52 | * fibonacci(0) throws an error 53 | ``` 54 | 55 | A function block begins with `# func ` and is then followed by a math-y function declaration of `function_name(type1, type2, ...): return_type` 56 | 57 | Only input and output types are provided, *not* any argument names. Since this language is *not* imperative, the examples will never explicitly label the input arguments. The description *can* name the argument, like is done in this example, but there is no explicit requirement to do so and many functions likely won't need to given the context from the example function calls. 58 | 59 | The types can be anything you want, but if the type is a single word, it would be checked against the list of user-defined types to potentially include in the function generation prompt for better context. 60 | 61 | #### Data Type Syntax Proposal 62 | 63 | For "Data Type" we are only considering struct-style types where there's named properties with their own sub-types. "Base" types, like integers, strings, booleans, etc, will just be implicit and handled by the LLM, and we won't model tuple types as you can just represent them as struct types with property names 1, 2, 3, etc. 64 | 65 | The most common form of struct type that most non-developer technical users are aware of is the table type, like in SQL, or also known as a spreadsheet with column labels in Excel or as a CSV file. The table can be represented as either an array of structs (row-oriented) or a struct of arrays (column-oriented), though the latter representation is generally only used for certain performance-centered contexts and the row-oriented representation is the "normal" one (that also more closely matches a struct syntax). 66 | 67 | With this in mind, we can make the data type syntax as close as possible to a snippet of a CSV to improve the ease of defining the type for non-developers: 68 | 69 | ```md 70 | # type SKU 71 | brand_id, sku_id, color, type 72 | 20, 10040, 'red', 'shirt' 73 | 50, 10059, 'blue', 'shirt' 74 | ``` 75 | 76 | The beginning of the syntax starts with `# type ` followed by a single word specifying the name of the type. 77 | 78 | After that are the first few lines of a CSV file, with the first row being the column headers that define the struct property names, and the following rows some example data. 79 | 80 | We considered adding a reference syntax to the function definition to allow the examples to directly use one of the example values from the data type definition as an input or output type, but references (eg, pointers) are hard for non-developers (or even junior developers), and the closest existing example to this concept [YAML anchors](https://medium.com/@kinghuang/docker-compose-anchors-aliases-extensions-a1e4105d70bd), are uncomfortable to most developers, too, so we dropped the concept (for now, at least). 81 | 82 | Regardless of if we had the reference syntax, though, there needs to be some way for users to define a custom struct record that is passed as an input argument or returned as the output type from a function in the examples. Because of LLMs, this doesn't have to be strictly enforced (anything programming language "like" ought to work, to varying degrees of reliability), but we probably should recommend using the syntax of the target language when possible, so for Python that would look like calling a constructor function for a class: 83 | 84 | ``` 85 | SKU(20, 10040, 'red', 'shirt') 86 | ``` 87 | 88 | (We may want to provide these types to the LLM as Python classes when generating Python code, and this syntax is simple enough that we should be able to do so with basic coding, not needing an LLM in the loop for it, but that is an implementation detail we will decide on in the actual code.) 89 | 90 | We have also dropped explicit typing of the sub-types for a user-defined type to instead rely on the LLM to infer the type from the examples. We may add back that in the future, but for the sake of speed of release and reduced scope for the initial version we cut it for now. 91 | 92 | ### Data Syntax Alternatives Considered 93 | 94 | #### Pure CSV Syntax with Types Row 95 | 96 | ```md 97 | # type SKU 98 | brand_id, sku_id, color, type 99 | integer, integer, string that is a valid color name, string that is name of article of clothing 100 | 20, 10040, 'red', 'shirt' 101 | 50, 10059, 'blue', 'shirt' 102 | ``` 103 | 104 | #### Pure CSV Syntax with Optional Types 105 | 106 | ```md 107 | # type SKU 108 | brand_id: integer, sku_id: integer, color: string that is a valid color name, type: string that is name of article of clothing 109 | 20, 10040, 'red', 'shirt' 110 | 50, 10059, 'blue', 'shirt' 111 | 112 | sku_from_new_brands(SKU[sku_id=10040]) 113 | ``` 114 | 115 | #### Numbered Constructor Syntax with Optional Types 116 | 117 | ```md 118 | # type SKU(brand_id: integer, sku_id: integer, color: string that is a valid color name, type: string that is name of article of clothing) 119 | 1. SKU(20, 10040, 'red', 'shirt') 120 | 2. SKU(50, 10059, 'blue', 'shirt') 121 | ``` 122 | 123 | All the numbered syntax examples can be referenced in the function examples using `#`: 124 | 125 | ```md 126 | # sku_from_new_brand(SKU[], Brand[]) = SKU[] 127 | 128 | - sku_from_new_brand([SKU#1], ...) = SKU#1 129 | ``` 130 | 131 | #### Numbered CSV-like Syntax with Optional Types 132 | 133 | ```md 134 | # type SKU 135 | brand_id: integer, sku_id: integer, color: string that is a valid color name, type: string that is name of article of clothing 136 | 1. 20, 10040, 'red', 'shirt' 137 | 2. 50, 10059, 'blue', 'shirt' 138 | ``` 139 | 140 | #### Numbered, CSV-like Syntax with Optional Types 141 | 142 | ```md 143 | # type SKU(brand_id: integer, sku_id: integer, color: string that is a valid color name, type: string that is name of article of clothing) 144 | 1. 20, 10040, 'red', 'shirt' 145 | 2. 50, 10059, 'blue', 'shirt' 146 | ``` 147 | 148 | #### Numbered, TS-like Syntax with Optional Types 149 | 150 | ```md 151 | # type SKU(brand_id: integer, sku_id: integer, color: string that is a valid color name, type: string that is name of article of clothing) 152 | 1. SKU { 20, 10040, 'red', 'shirt' } 153 | 2. SKU {50, 10059, 'blue', 'shirt' } 154 | ``` 155 | 156 | ### Function Syntax Alternatives Considered 157 | 158 | #### Markdown-like Syntax 159 | 160 | A markdown-based syntax has several advantages in that you can just render the markdown and get something easily readable in a [literate programming](https://en.wikipedia.org/wiki/Literate_programming) style. It also provides some hints about blocks and ordering without needing to explicitly close blocks with curly braces or indent blocks like Python. 161 | 162 | There are a few possibilities here on how to structure it, more verbosely and English-like, and more compact and math-like. It should be trivial to transform one into the other so we *could* also decide to support both styles, but it's probably best to choose one, or some minor blend of the two for clarity. 163 | 164 | ##### Verbose-style 165 | 166 | ```md 167 | # Function fibonacci 168 | 169 | ## Inputs 170 | 171 | 1. n is an integer 172 | 173 | ## Output 174 | 175 | An integer within the fibonacci set 176 | 177 | ## Description 178 | 179 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 180 | 181 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 182 | 183 | ## Examples 184 | 185 | * fibonacci(1) = 1 186 | * fibonacci(2) = 1 187 | * fibonacci(3) = 2 188 | * fibonacci(0) throws an error 189 | ``` 190 | 191 | ##### Math-style 192 | 193 | ```md 194 | # fibonacci(n: int): int 195 | 196 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 197 | 198 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 199 | 200 | * fibonacci(1) = 1 201 | * fibonacci(2) = 1 202 | * fibonacci(3) = 2 203 | * fibonacci(0) throws an error 204 | ``` 205 | 206 | The math-like form compacts the first three sections together and doesn't need to specify that it's a function since it follows a function syntax. Further, the examples at the end don't need an explicit subsection in this form because it is unambiguous that the first part without anything is the description and the bullet-point list are the examples. The more verbose form uses an enumerated list for the input arguments so argument order is explicit. The math-like form following a Typescript-like type system probably makes the most sense, but could also go with C style `int fibonacci(int n)` if we wanted to. It may even be the case that the LLM will be "fine" with either of them, or spelling out `integer` or `32-bit integer` and not have a problem to translate that into the target language of choice. Being explicit here may not actually be necessary, but if/when we have more than just functions, that may force us to choose something precise here so we can distinguish function blocks from other blocks. The verbose version replaces `:` with `is a(n)` so `n: int` became `n is an integer`. 207 | 208 | #### "Notepad"/"Word" syntax 209 | 210 | Markdown syntax does still require some syntax to learn, even if it is minor. A "pure english text" approach is another alternative, with the only "syntax" being a colon. 211 | 212 | ``` 213 | Function: fibonacci 214 | Inputs: n is an integer 215 | Output: An integer within the fibonacci set 216 | Description: This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 217 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 218 | Examples: 219 | fibonacci(1) = 1 220 | fibonacci(2) = 1 221 | fibonacci(3) = 2 222 | fibonacci(0) throws an error 223 | End Function 224 | ``` 225 | 226 | This syntax feels like it needs an explicit "End Function" declaration. It *probably* could be optional, but it might be easier on humans reading it to be there. In this syntax `Function: `, `Inputs: `, `Output: `, `Description: `, `Examples: `, and `End Function` are keywords if and only if they are at the very beginning of the line. They must also be in the listed order so there is some syntax highlighting and error reporting possible. The `Inputs` and `Examples` sections expect each input or example to be on a separate line. You can immediately use the same line that the keyword is located on, or you can start on the next line, whichever is easier. This syntax can/should ignore blank lines and it can consider the remainder of the line after the keyword as a blank line to ignore, so you can also "format" the example for greater clarity: 227 | 228 | ``` 229 | Function: fibonacci 230 | 231 | Inputs: n is an integer 232 | 233 | Output: An integer within the fibonacci set 234 | 235 | Description: 236 | 237 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 238 | 239 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 240 | 241 | Examples: 242 | 243 | fibonacci(1) = 1 244 | fibonacci(2) = 1 245 | fibonacci(3) = 2 246 | 247 | fibonacci(0) throws an error 248 | 249 | End Function 250 | ``` 251 | 252 | This "syntax" has the advantage of incredibly bare-bones plain text and could reasonably be written by someone used to using Microsoft Word, for instance, which may have some value. It can still be syntax highlighted with a custom highlighter, but is still legible without one. 253 | 254 | #### XML-based syntax 255 | 256 | Tons of people were able to make websites back in the late 90s. The HTML syntax was simple enough at the time that hand editing it was okay, and people at least tolerated the verbosity. XML is the generalization and simplification of that HTML syntax, and so it could similarly be used here. 257 | 258 | ```xml 259 | 260 | 261 | 262 | 263 | 264 | 265 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 266 | 267 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 268 | 269 | 270 | fibonacci(1) = 1 271 | fibonacci(2) = 1 272 | fibonacci(3) = 2 273 | fibonacci(0) throws an error 274 | 275 | 276 | ``` 277 | 278 | This syntax is super-easy to parse, syntax highlight, people have in the past tolerated it, and there's no ambiguity issue as the language is extended to handle more than just functions, but it is the most verbose option possible, and you probably need an editor to help you write it correctly. There's also the ambiguity surrounding properties on tags versus nested tags. Eg, could `` also be written `fibonacci`? Why or why not? 279 | 280 | #### JSON/YAML/TOML syntaxes 281 | 282 | The XML-based syntax above shows that we could similarly format this with any other data interchange format. Not going to make examples for these right now because I think they're a bad idea (and I'm low on time before the next meeting), but definitely possible. 283 | 284 | #### SQL-inspired syntax 285 | 286 | One of the most successful languages for non-developers is SQL, and many of the "software engineer adjacent" roles that we think this language could lower the barrier to entry for often know SQL, so we could use that as the base of our language. 287 | 288 | ``` 289 | FUNCTION fibonacci (n int) RETURNS an integer within the fibonacci set 290 | DESCRIPTION 291 | This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 292 | 293 | fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 294 | EXAMPLES ( 295 | fibonacci(1) = 1, 296 | fibonacci(2) = 1, 297 | fibonacci(3) = 2, 298 | fibonacci(0) throws an error 299 | ); 300 | ``` 301 | 302 | Examples become comma-delimited within parentheses, and the keywords `FUNCTION`, `RETURNS`, `DESCRIPTION`, and `EXAMPLES` must be provided in order, and presumably are case-insensitive. The indentations are trimmed from the text and are ignored by the language. This does mean that if you need to use the word `EXAMPLES` inside of your `DESCRIPTION` block, there would need to be some special escape mechanism (similarly in the function name, input arguments, and return type blocks, but less likely to occur there). We could similarly follow the SQL standard here and require single quotes around these keywords when used as not-a-keyword, like `'examples'`, but it may not be immediately obvious to someone new when they read it why that is the case. 303 | 304 | #### C-inspired syntax 305 | 306 | Including it anyways, because it should totally work, though I don't know if this is the best of ideas 307 | 308 | 309 | ```c 310 | function fibonacci(int n) { 311 | /** 312 | * This function calculates the nth fibonacci number, where n is provided to it and starts with 1. 313 | * 314 | * fibonacci(n) = fibonacci(n - 1) + fibonacci(n - 2) 315 | **/ 316 | assert fibonacci(1) = 1; 317 | assert fibonacci(2) = 1; 318 | assert fibonacci(3) = 2; 319 | assert fibonacci(0) throws an error; 320 | return an integer within the fibonacci set; 321 | } 322 | ``` 323 | 324 | This looks more "code-like" and has the keywords `function`, `assert`, and `return`, using a "flowerbox" style comment section for the description (but would probably also support non-flowerbox style and lines of one-liner `//` comments, too), and uses curly braces for block scope and semi-colons for statement separators. It would be more familiar for developers, but also anyone who has taken at least an intro to programming course, so that may still be fine? 325 | 326 | ## Expected Semver Impact 327 | 328 | If we were at a 1.0.0+ version (somehow before anything exists) this would be a major version bump ;) 329 | 330 | ## Affected Components 331 | 332 | Everything 333 | 334 | ## Expected Timeline 335 | 336 | An RFC proposal should define the set of work that needs to be done, in what order, and with an expected level of effort and turnaround time necessary. *No* multi-stage work proposal should leave the engine in a non-functioning state. 337 | -------------------------------------------------------------------------------- /rfcs/002 - Compiler Refactor RFC.md: -------------------------------------------------------------------------------- 1 | # 002 - Compiler Refactor RFC 2 | 3 | ## Current Status 4 | 5 | ### Proposed 6 | 7 | 2023-08-12 8 | 9 | ### Accepted 10 | 11 | 2023-08-15 12 | 13 | #### Approvers 14 | 15 | - Luis de Pombo 16 | - Alejandro Guillen 17 | 18 | ### Implementation 19 | 20 | - [ ] Implemented: [One or more PRs](https://github.com/alantech/marsha/some-pr-link-here) YYYY-MM-DD 21 | - [ ] Revoked/Superceded by: [RFC ###](./000 - RFC Template.md) YYYY-MM-DD 22 | 23 | ## Author(s) 24 | 25 | - David Ellis 26 | 27 | ## Summary 28 | 29 | Multiple concurrent efforts to move Marsha forward (testing side-effect functions, special syntax for databases, using Llama V2 or WizardCoder for local LLM support) have been stymied by requiring very branchy code to implement on our simple fixed 3-stage parse-transform-emit compiler. 30 | 31 | We either need to be very sure where we want to take Marsha to continue a fixed pipeline, or we need to consider alternative compiler structures. 32 | 33 | ## Proposal 34 | 35 | #### How LLMs are like compilers 36 | 37 | Simply put: compilers take some input (configuration flags, code, etc), parse it into an AST that can then be manipulated to produce some output in another format. LLMs take some input (SYSTEM directives, user requests, etc) parse it into tokens that can then be manipulated to produce a new output. 38 | 39 | #### How LLMs are *not* like compilers 40 | 41 | Compilers, when given the same input to the same compiler code, produces identical output. It may be difficult to get it to produce exactly the assembly instructions you're looking for, but once you find that magical incantation, it will continue to work. LLMs are not deterministic in that way. They will produce different output for the same input(s). Further, some inputs may produce an output that works some times and doesn't work other times. 42 | 43 | #### What Marsha currently brings to the table 44 | 45 | Marsha has three stages of operation, but those three stages together accomplish a singular task: make it so output from the LLM works as reliably as the test suite you provide it, or block the output entirely, instead of providing misleading output that doesn't work. This operates on the principle that verifying the output is more reliable than generating the output. (Which have precedence in things like the Sieve of Eratosthenese.) When the first stage fails to parse, it immediately returns an error message informing the user of the parse error. But when a verification fails in the second or third stage, it executes a different LLM call in a loop to attempt to get the LLM to correct itself. (The first stage also has an LLM-based sanity check that the even syntactically correct requests are "reasonable", but it doesn't loop, so it's not very relevant to this discussion.) Once it gets through all three stages it has working code (and a working test suite) that we then bolt helper logic to make CLI and HTTP based usage easier. 46 | 47 | When it's broken down like that, we notice two basic kinds of operations: 48 | 49 | 1. `input -> transform -> output or error` 50 | 2. `input -> transform -> check and maybe retry -> output or error` 51 | 52 | The fact that a transform or check is powered by an LLM or "regular" code is just an implementation detail. 53 | 54 | A cross-cutting concern is the actual features of the language implemented within the entire flow. The entire flow is, roughly: 55 | 56 | 1. input 57 | 2. parse AST or error 58 | 3. transform AST to more verbose markdown or error 59 | 4. sanity check with LLM or "error" (on error, run a second LLM to try to give hints why it failed the sanity check) 60 | 5. prompt LLM to generate all functions and classes based on output from (3) or error (only if LLM unreachable), generating 3 different files by default to improve reliability for a greater expense 61 | 6. prompt LLM to generate test suite for all functions based on output from (3) or error (can be done in parallel when using ChatGPT), generating 3 different files by default to improve reliability for a greater expense 62 | 7. take pairs of output from (5) and (6) and verify they are valid markdown with the expected sections only, returning the pair(s) that work, or erroring if none do 63 | 8. take each pair of outputs from (5) and (6) and run them through a Python linter to confirm they are valid python (most stylistic rules removed, just syntax, unused imports, and a few others), if it errors it sends all of this to an LLM to generate new versions of the file, repeating on error until a limit is reached, then exiting the flow for that particular pair, erroring if all pairs fail. 64 | 9. take each pair of outputs from (8) and runs the test suite, passing them and the test suite errors to an LLM if they fail, otherwise continuing. If it continues to fail for too long, the particular pair exits the flow, if all pairs exit the flow, it errors. 65 | 10. the first pair to finish (9) cancels the rest of the operations, then by default it appends the CLI and HTTP server helper logic to the code file and exits. 66 | 67 | For now, *most* of the Marsha language functionality exists within (3), and it is a synchronous operation, but most of the solutions for database support would at minimum require (3) to become async and involve various DB and ML operations (likely: connect to the DB and generate a schema dump, then chunk that dump and index it with vectors, then use vector search to find the most relevant table(s), stored procedure(s), etc to provide to (5) and (6)) but it's possible these features could "bleed" into the validation steps of Marsha, and the codebase starts turning (even more) into a big ball of mud. (For instance, validating the generated SQL DB access code in (5) against the actual DB schema in (7) seems likely). 68 | 69 | There are other dimensions we would like to extend Marsha on: 70 | 71 | * Which LLM is used to do the work. 72 | * Which target language Marsha compiles to. 73 | * Dev tools (debugging, decompiling, etc) for Marsha. 74 | 75 | The solution we come up with needs to keep the complexity of all of these different features from compounding on one another, ideally. 76 | 77 | #### Proposal: Marsha as Extensions that append to lists of layers 78 | 79 | Taking a very rough inspiration from LLMs themselves, we'll have a singular kind of "mapper" that is the foundational piece of the compiler, and they will be organized into layers where each layer executes in parallel and their outputs are fed into the next layer, etc. Each logical feature will be an extension that concatenates its logic into the appropriate layers, with the "meaning" said layers being a convention per extension "group". 80 | 81 | Now working backwards, an Extension Group is a collection of extensions that all declare membership of a particular group name. Extensions won't be allowed to mix between groups to reduce the complexity involved, and because an Extension Group declares how many layers exist and their input and output formats (most layers will likely be lists of some class type, but the input for the first layer will be `None` and use the quasi-global configuration data to set itself up and the output of the last layer can only be `None` or `str`). 82 | 83 | A new extension that wishes to insert a special layer between existing layers would have to fork the entire extension group project, which seems wasteful, but as long as the layer definitions are "cheap" (simple and easy to read), just forking an extension group under a new name shouldn't be too big of a burden. 84 | 85 | The "mapper" logic in each layer will consist of just the #2 operation type listed above: `input -> transform -> check and maybe retry -> output or error`, as the third step becomes a simple no-op in other cases, it could have a default `f(a) = a`-type implementation and made an optional element of that class. 86 | 87 | Yes, class. The use of the word "mapper" here could be confusing, so this part is definitely subject to change, but there'd be a `BaseMapper` class that must be extended and where you must implement the `transform` method, but the `check` method could use the default implementation. Both of these methods are `async` so they can do whatever you want within them. 88 | 89 | And you could always Bring-Your-Own-LLM, but it would probably be best if the Marsha project itself maintained a collection of `*Mapper` classes that automatically make LLMs like ChatGPT, LLamaV2, WizardCoder, etc, and other useful tools like Markdown AST, Python venv projects, etc, easily defineable with as few lines of code as possible. Each mapper is given the entirety of the prior layer's output, which will usually be a list of outputs when there are multiple mapper in a layer, but would be without the list wrapper if there is only one mapper in that layer (so for instance, loading the `*.mrsh` file could be a mapper in its own layer all by itself that loads and parses the AST, and then feeds that AST root node to the next layer that does the code and test LLM operations in parallel for ChatGPT, while Llama V2 locally could make that a singular mapper so they run sequentially). 90 | 91 | Even though the mappers run in parallel, we'll use asyncio's `gather` mechanism to make sure they stay in the "expected" order. 92 | 93 | You could conceivably just have the Extension Group directly place the amppers in the "right" places, but that means there'd be no way for a user to add or remove "optional" or "recommended" extensions, so there'd be zero configurability for them (and therefore *all* experimentation with how Marsha should work would require forking the extension group every time). If instead there are "required", "recommended", and "optional" extensions within an extension group, the user could manually disable recommended extensions or manually enable optional extensions to configure the behavior. Then the Extension Group defines the order in which each extension appends its mappers to the different layers, and each Extension defines which layers each transformer belongs to. This should allow experimentation with new syntax that is incompatible with existing syntax by marking the incompatible syntax a recommended extension and the new syntax an optional extension, and a savvy user could swap them out if desired. 94 | 95 | These concepts would be baked into `Extension` and `ExtensionGroup` classes that would force the desired organization onto the code, and they, along with the `*Mapper`s could be put together into a Python module (or just a singular `*.py` file) and then [dynamically imported by Marsha](https://docs.python.org/3/library/importlib.html#module-importlib) so they could live outside of the codebase. 96 | 97 | If the user does not specify an extension group, it would be assumed to use the default extension group that will be baked into Marsha (or later, if/when we add decompilation/debugging/etc functionality, it would depend on the subcommand called, like `marsha compile ...`, `marsha decompile ...`, `marsha debug ...`, etc). Similarly, if no extension manipulation is requested by the user, the required and recommended extensions would be loaded (otherwise, the set of extensions to load will be modified based on the inclusion/exclusion lists). 98 | 99 | This gives us a fairly flexible control over how Marsha works and how it can be developed into the future, while also giving us a fractal-like organization of the code in question, making it easier to understand and maintain while keeping the code reasonably DRY and efficient. 100 | 101 | ### Alternatives Considered 102 | 103 | #### Marsha as fully undirected graph of mappers 104 | 105 | Here, each mapper simply declares a named source for its input and a named output destination, with `START` and `END` being special nodes. This is very similar in spirit to [queue-flow](https://dfellis.github.io/queue-flow/) that I wrote so many years ago, and could even handle multiple nodes reaching `END` at different times by using introspection on the event loop to decide when to actually quit. 106 | 107 | It was rejected because while you can write pretty succinct code that efficiently handles sync or async functions, the named queues make it difficult/impossible to have runtime-configurable extension configuration with it, and it doesn't help the logical grouping of syntactic elements that are spread across multiple mappers throughout the graph, so readability would only be marginally improved. (That last part was pretty general to queue-flow -- I tended to get "write-only" code out of it that was fast, efficient, and near impossible for other developers to read, because the connections between the named queues were often spread across files and hard to follow.) 108 | 109 | #### Marsha as database and trigger-transformers 110 | 111 | There's been work on turning compilers into specialized databases, particularly for type inference or langserver use-cases where partial compilation of known data and multiple passes makes a lot of sense, and being able to easily query for things "other parts" of the compiler have "figured out" in the meantime can allow improvements in function generation in secondary passes, or simply allow compilation to complete at all for said functions. It's a very different way of thinking about how compilation works and definitely has some merit, but seems very similar functionally to the queue-flow-like approach described above: each registered operation would have to register what data it needs to do it's work and then get triggered when the required data is there. This makes an operation that depends on multiple sources of different data easier to implement than the queue-flow approach (but is doable with that approach with a reducer-transformer function that both sources push to and it only pushes to it's output queue once all expected data sources have arrived), but it has the exact same problem that the actual computational flow is very difficult to reason about, though it is easier to update a singular file and recompile just it and only parts of other files that are impacted, for instance, instead of having to start the world over again, so the advantage for language servers is there. 112 | 113 | But the increased maintenance burden *plus* the LLM latency problem (such that sub-100ms response times for IDE-langserver integration to be meaningful is impossible) means we don't want to try for a language server for quite a while, if ever. So this approach is also rejected. It also suffers from actual language extensions being exceedingly difficult to do in this appraoch, because you would have to modify each transformer's query to adjust which inputs it consumes, which means it's an abstraction that doesn't fit if that's desirable. (Usually, extensible parts of the language go into the standard library and the syntax itself is not really extensible, just the set of symbols you're working with. I do wonder if/how Raku implements a lang server?) 114 | 115 | #### Layers only Extensions 116 | 117 | In this version, each Extension Group is a list of Extensions, and these extensions are actually the transformers, and executed in the order defined, one input leading to another output, and that's it. Extension Groups are just shorthand for particular transformer orders. This is much simpler than the proposal, but has several drawbacks: first, "related" transformers for a given feature are not obvious (except perhaps by naming convention) so misconfiguration by leaving part of a feature out is easily possible. It is also possible to accidentally put two incompatible transformers next to each other when another order of the same transformers is fine, but there would be no warning to you until you tried and it failed. Finally, it's impossible to run some transformers in parallel even if they really don't depend on each other, so this approach would reduce our current compiler performance noticeably. 118 | 119 | #### Keep Hacking 120 | 121 | Do nothing to change the way we're doing things and just hack the features on at will. The complexity of the codebase would temporarily go up while we're figuring things out, but would presumably drop down some again in the future once certain behaviors are determined irrelevant or are merged into more generalized ones. We could presumably keep going on this for a bit, but competing features would have to live on long-lived feature branches while being figured out. Because the reliability of an LLM-based transform is lower than a traditional compiler transform, some language ideas may look good on paper but can't actually work with the LLMs of today, at least, possibly never, and determining that ahead of time is sometimes frustratingly impossible, so this approach is rejected as putting too high of a maintenance burden on us, as well as keeping external contribution lower. It also never allows Marsha-the-OSS-project to be considered a separate thing from Marsha-the-AI-language. This means things like the decompiler, debugger, etc, would likely need to be parallel projects (or at least the various Marsha-related things would be wholly-separate sub-modules within a parent module and little to do with each other architecturally). 122 | 123 | Finally, this approach feels antithetical to the bootstrapping goal. The proposed solution could conceivably have the transformers, extensions, and extension groups generated by Marsha itself, and the "core" of Marsha (the various classes defined) could be converted piecemeal, too, until it's all Marsha code. Handwritten, tightly-coupled compiler logic doesn't seem as amenable to that sort of rewrite. 124 | 125 | ## Expected Semver Impact 126 | 127 | The language itself would have zero use-facing chnages, so that would be a patch version change, but Marsha as a more generalized tool beyond Marsha the AI language might imply it's a major version bump. Hard to decide. 128 | 129 | ## Affected Components 130 | 131 | Absolutely everything in the codebase, but we could probably do this piecemeal, first reoganizing the three stages into transformers, and then writing the extension and extension group classes and handling logic and dropping the fixed pipeline later. 132 | 133 | ## Expected Timeline 134 | 135 | 1. Create `BaseMapper`, `ChatGPTMapper`, `MarkdownASTMapper`, etc classes 136 | 2. Rewrite the Marsha stages with these classes. 137 | 3. Create the `Extension` and `ExtensionGroup` classes (and/or better name for `ExtensionGroup`?) 138 | 4. Rewrite the pipeline to use these classes with a hardwired instantiation. (Temporarily dropping `--quick-and-dirty` and potentially other flags) 139 | 5. Convert the hardwired instantiation into a default that is used if the user doesn't provide an `ExtensionGroup` and extension configuration options, but use the specified `ExtensionGroup` if defined. 140 | 6. Restore `--quick-and-dirty` (and potentially others) as an `ExtensionGroup` 141 | 7. Start implementing `Llama2Mapper`, `WizardCoderMapper`, etc for some experiments, DB-specific ones for others, etc. 142 | -------------------------------------------------------------------------------- /marsha/llm.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from asyncio.subprocess import Process 3 | import os 4 | import platform 5 | import time 6 | import traceback 7 | import shutil 8 | import subprocess 9 | import sys 10 | 11 | from pylama.main import parse_options, check_paths, DEFAULT_FORMAT 12 | 13 | from marsha.meta import MarshaMeta 14 | from marsha.parse import validate_first_stage_markdown, validate_second_stage_markdown, write_files_from_markdown, format_marsha_for_llm, extract_func_name 15 | from marsha.stats import stats 16 | from marsha.utils import read_file, autoformat_files, prettify_time_delta 17 | from marsha.mappers.chatgpt import ChatGPTMapper 18 | 19 | # PyInstaller creates a temp folder and stores path in _MEIPASS 20 | base_path = '.' 21 | if hasattr(sys, '_MEIPASS'): 22 | base_path = sys._MEIPASS 23 | 24 | # Determine what name the user's `python` executable is (`python` or `python3`) 25 | python = 'python' if shutil.which('python') is not None else 'python3' 26 | if shutil.which(python) is None: 27 | raise Exception('Python not found') 28 | 29 | 30 | async def gpt_can_func_python(meta: MarshaMeta, n_results: int): 31 | gpt_can_func = ChatGPTMapper('''You are a senior software engineer reviewing an assignment to write a Python 3 function. 32 | The assignment is written in markdown format. 33 | It should include sections on the function name, inputs, outputs, a description of what it should do, and some examples of how it should be used. 34 | You are assessing if this document has enough context such that a junior software engineer with a couple of years of experience should be able to write the desired function and a test suite to verify it. 35 | The description must be precise enough to determine what to do. 36 | The examples must be complete enough to likely catch all edge cases. 37 | If the description and examples are broad enough that different engineers could reasonably create very different functions that supposedly meet the requirements but do different things, that is another reason to reject this assignment. 38 | Your answer is consumed by project management software, so only respond with Y for yes or N for no. 39 | ''', max_tokens=1, n_results=n_results, stats_stage='first_stage') 40 | marsha_for_code_llm = format_marsha_for_llm(meta) 41 | gpt_opinions = await gpt_can_func.run(marsha_for_code_llm) 42 | if any([True if opinion == 'N' else False for opinion in gpt_opinions]): 43 | return False 44 | return True 45 | 46 | 47 | gpt_improve = ChatGPTMapper('''You are a senior software engineer reviewing an assignment to write a Python 3 function that a junior software engineer has written. 48 | The assignment is written in markdown format. 49 | It includes sections on the function name, inputs, outputs, a description of what it should do, and some examples of how it should be used. 50 | You have already decided this document is not written well enough such that another engineer can reliably write a working function that meets expectations, nor a test suite to verify proper functionality. 51 | The description must be precise enough to determine what to do. 52 | The examples must be complete enough to likely catch all edge cases. 53 | You are writing a few paragraphs gently explaining the deficiencies in the task definition they have written, not coming up with examples assuming what they might have wanted, since that isn't clear in the first place, just why what they have provided is not precise enough. 54 | In your response do not refer to the person at all or tell them what mistakes "they" have made. This is a blameless culture. The mistakes simply are, and that they made them isn't a problem, just that they should learn from them. 55 | Do not include a "hello" or a "regards", etc, as your response is being attached to a code review system. 56 | ''', stats_stage='first_stage') 57 | 58 | 59 | async def gpt_improve_func(meta: MarshaMeta): 60 | marsha_for_code_llm = format_marsha_for_llm(meta) 61 | improvements = await gpt_improve.run(marsha_for_code_llm) 62 | print(improvements) 63 | 64 | 65 | async def gpt_func_to_python(meta: MarshaMeta, n_results: int, retries: int = 3, debug: bool = False): 66 | marsha_for_code_llm = format_marsha_for_llm(meta) 67 | gpt_gen_code = ChatGPTMapper(f'''You are a senior software engineer assigned to write Python 3 functions. 68 | The assignment is written in markdown format. 69 | The description of each function should be included as a docstring. 70 | Add type hints if feasible. 71 | The filename should exactly match the name `{meta.filename}.py`. 72 | Make sure to follow PEP8 guidelines. 73 | Make sure to include all needed standard Python libraries imports. 74 | Generate `requirements.txt` file with all needed dependencies, do not add fixed version to dependencies. 75 | If need to convert `type` to Python classes, you will receive a markdown where the heading is the class name followed by several rows following a comma separated CSV format where the first row contains all class properties and the following rows contain examples of the values of those properties. Make sure to add the __str__, __repr__, and __eq__ methods to the class. 76 | Your response must not comment on what you changed. 77 | Your response must not add any additional comments, clarifications, notes, information, explanations, details, examples or thoughts. 78 | Your response must be a markdown file. 79 | The first section header must be the filename `{meta.filename}.py`. 80 | The content of the first section must be a python code block with the generated code. 81 | The second section header must be the filename `requirements.txt`. 82 | The content of the second section must be a text code block with the generated code. 83 | The file should end with the code block, nothing else should be added to the file. 84 | The desired response must look like the following: 85 | 86 | # {meta.filename}.py 87 | 88 | ```py 89 | 90 | ``` 91 | 92 | # requirements.txt 93 | 94 | ```txt 95 | 96 | ``` 97 | 98 | ''', n_results=n_results, stats_stage='first_stage') 99 | marsha_for_test_llm = format_marsha_for_llm(meta) 100 | gpt_gen_test = ChatGPTMapper(f'''You are a senior software engineer assigned to write a unit test suite for Python 3 functions. 101 | The assignment is written in markdown format. 102 | The unit tests created should exactly match the example cases provided for each function. 103 | You have to create a TestCase per function provided. 104 | The filename should exactly match the name `{meta.filename}_test.py`. 105 | Unknown imports might come from the file where the function is defined, or from the standard library. 106 | If you are working with files, make sure to mock the file system since the tests will be run in a sandboxed environment. 107 | Make sure to follow PEP8 guidelines. 108 | Make sure to include all needed standard Python libraries imports. 109 | Your response must not comment on what you changed. 110 | Your response must not add any additional comments, clarifications, notes, information, explanations, details, examples or thoughts. 111 | Your response must be a markdown file. 112 | The first section header must be the filename `{meta.filename}_test.py`. 113 | The content of the first section must be a python code block with the generated code. 114 | The file should end with the code block, nothing else should be added to the file. 115 | The desired response must look like the following: 116 | 117 | # {meta.filename}_test.py 118 | 119 | ```py 120 | 121 | ``` 122 | 123 | ''', n_results=n_results, stats_stage='first_stage') 124 | if debug: 125 | print(f'''marsha_for_llm = 126 | ---- start ---- 127 | {marsha_for_code_llm} 128 | ---- end ----''') 129 | 130 | reses = await asyncio.gather(gpt_gen_code.run(marsha_for_code_llm), gpt_gen_test.run(marsha_for_test_llm)) 131 | # The output should be a valid list of Markdown documents. Parse each one and return the list of parsed doc, on failure 132 | # do not add it to the list. If the list to return is empty try again (or fully error out, for now) 133 | try: 134 | mds = list() 135 | for i in range(n_results): 136 | # TODO: This unfairly reduces the success probability of the separate GPT calls, requiring both in the same run 137 | # to pass. It should instead try to use the same pass if possible, but otherwise use a different pairing so bad 138 | # dice rolls don't compound each other. 139 | doc = reses[0][i] + '\n\n' + reses[1][i] 140 | # Some validation that the generated file matches the expected format of: 141 | # # function_name.py 142 | # ```py 143 | # 144 | # ``` 145 | # # requirements.txt 146 | # ```text 147 | # 148 | # ``` 149 | # # function_name_test.py 150 | # ```py 151 | # 152 | # ``` 153 | if validate_first_stage_markdown(doc, meta.filename): 154 | mds.append(doc) 155 | else: 156 | if debug: 157 | print(f'''[First stage] Invalid doc: 158 | {doc}''') 159 | if len(mds) == 0: 160 | raise Exception('Invalid output format') 161 | return mds 162 | except Exception: 163 | if debug: 164 | print( 165 | f'Failed to parse doc. Retries left = {retries}. Retrying...') 166 | if retries > 0: 167 | return await gpt_func_to_python(meta, n_results, retries - 1, debug) 168 | else: 169 | raise Exception('Failed to generate code', meta.filename) 170 | 171 | 172 | async def fix_file(marsha_filename: str, filename: str, lint_text: str, retries: int = 3, debug: bool = False): 173 | code = read_file(filename) 174 | gpt_fix = ChatGPTMapper(f'''You are a senior software engineer working with Python 3. 175 | You are using the `pylama` linting tool to find obvious errors and then fixing them. The linting tool uses `pyflakes` and `pycodestyle` under the hood to provide the recommendations. 176 | All of the lint errors require fixing. 177 | You should only fix the lint errors and not change anything else. 178 | Your response must not comment on what you changed. 179 | Your response must not add any additional comments, clarifications, notes, information, explanations, details, examples or thoughts. 180 | Your response must be a markdown file. 181 | The first section header must be the filename `{filename}`. 182 | The content of the first section must be a python code block with the generated code. 183 | The file should end with the code block, nothing else should be added to the file. 184 | The desired response must look like the following: 185 | 186 | # {filename} 187 | 188 | ```py 189 | 190 | ``` 191 | 192 | ''', stats_stage='second_stage') 193 | fixed_code = await gpt_fix.run(f'''# {filename} 194 | 195 | ```py 196 | {code} 197 | ``` 198 | 199 | # pylama results 200 | 201 | ``` 202 | {lint_text} 203 | ```''') 204 | # The output should be a valid Markdown document. Parse it and return the parsed doc, on failure 205 | # try again (or fully error out, for now) 206 | try: 207 | if not validate_second_stage_markdown(fixed_code, filename): 208 | if debug: 209 | print(f'''[Second stage] Invalid doc: 210 | {fixed_code}''') 211 | raise Exception('Invalid output format') 212 | write_files_from_markdown(fixed_code) 213 | except Exception: 214 | if retries > 0: 215 | return await fix_file(marsha_filename, filename, lint_text, retries - 1, debug) 216 | else: 217 | raise Exception('Failed to generate code', lint_text) 218 | 219 | 220 | async def lint_and_fix_files(marsha_filename: str, files: list[str], max_depth: int = 4, debug: bool = False): 221 | if max_depth == 0: 222 | raise Exception('Failed to fix code', files) 223 | options = parse_options() 224 | 225 | # Disabling pydocstyle and mccabe as they only do style checks, no compile-like checks 226 | options.linters = ['pycodestyle', 'pyflakes'] 227 | options.paths = [os.path.abspath(f'./{file}') for file in files] 228 | 229 | # options.select = { 230 | # 'E112', # expected an indented block 231 | # 'E113', # unexpected indentation 232 | # 'E901', # SyntaxError or IndentationError 233 | # 'E902', # IOError 234 | # 'E0602', # undefined variable 235 | # 'E1122', # unexpected keyword argument in function call 236 | # 'W0401', # wildcard import; unable to detect undefined names 237 | # } 238 | 239 | # We're using the linter as a way to catch coarse errors like missing imports. We don't actually 240 | # want the LLM to fix the linting issues, we'll just run the output through Python Black at the 241 | # end, so we have a significant number of warnings and "errors" from the linter we ignore 242 | options.ignore = { 243 | 'E111', # indentation is not multiple of 4 244 | 'E117', # over-indented 245 | 'E126', # continuation line over-indented for hanging indent 246 | 'E127', # continuation line over-indented for visual indent 247 | 'E128', # continuation line under-indented for visual indent 248 | 'E129', # visually indented line with same indent as next logical line 249 | 'E131', # continuation line unaligned for hanging indent 250 | 'E133', # closing bracket is missing indentation 251 | 'E201', # whitespace after `(` 252 | 'E202', # whitespace before `)` 253 | 'E203', # whitespace before `,` `;` `:` 254 | 'E211', # whitespace before `(`' 255 | 'E221', # multiple spaces before operator 256 | 'E222', # multiple spaces after operator 257 | 'E223', # tab before operator 258 | 'E224', # tab after operator 259 | 'E225', # missing whitespace around operator 260 | 'E226', # missing whitespace around arithmetic operator 261 | 'E227', # missing whitespace around bitwise or shift operator 262 | 'E228', # missing whitespace around modulo operator 263 | 'E231', # missing whitespace after `,` `;` `:` 264 | 'E241', # multiple spaces after `,` `;` `:` 265 | 'E242', # tab after `,` `;` `:` 266 | 'E251', # unexpected spaces around keyword / parameter equals 267 | 'E252', # missing whitespace around parameter equals 268 | 'E261', # at least two spaces before inline comment 269 | 'E262', # inline comment should start with `# ` 270 | 'E265', # block comment should start with `# ` 271 | 'E266', # too many `#` for block comment 272 | 'E271', # multiple spaces after keyword 273 | 'E272', # multiple spaces before keyword 274 | 'E273', # tab before keyword 275 | 'E274', # tab after keyword 276 | 'E275', # space missing after keyword 277 | 'E301', # expected 1 blank line, found 0 278 | 'E302', # expected 2 blank lines, found 0 279 | 'E303', # too many blank lines 280 | 'E304', # blank line after function decorator 281 | 'E305', # expected 2 blank lines after function or class 282 | 'E306', # expected 1 blank line before nested definition 283 | 'E401', # multiple imports on one line 284 | 'E501', # line too long 285 | 'E502', # blackslash redundant between brackets 286 | 'E701', # multiple statements on one line (colon) 287 | 'E702', # multiple statements on one line (semicolon) 288 | 'E703', # statement ends with a semicolon 289 | 'E722', # do not use bare except, specify exception instead 290 | 'E731', # do not assign a lambda expression, use a def 291 | 'W191', # indentation contains tabs 292 | 'W291', # trailing whitespace 293 | 'W292', # no newline at end of file 294 | 'W293', # blank line contains whitespace 295 | 'W391', # blank line at end of file 296 | # https://github.com/AtomLinter/linter-pylama/blob/master/bin/pylama/lint/pylama_pyflakes.py 297 | 'W0404', # module is reimported multiple times 298 | 'W0410', # future import(s) after other imports 299 | 'W0611', # unused import 300 | 'W0612', # unused variable 301 | } 302 | 303 | lints = check_paths( 304 | [os.path.abspath(f'./{file}') for file in files], options=options, rootdir='.') 305 | 306 | if len(lints) == 0: 307 | return 308 | 309 | jobs = [] 310 | for file in files: 311 | file_lints = [e.format(DEFAULT_FORMAT) 312 | for e in lints if e.filename == file] 313 | if len(file_lints) > 0: 314 | lint_text = '\n'.join(file_lints) 315 | jobs.append(fix_file(marsha_filename, file, 316 | lint_text, debug=debug)) 317 | await asyncio.gather(*jobs) 318 | 319 | await lint_and_fix_files(marsha_filename, files, max_depth - 1, debug) 320 | 321 | 322 | async def run_subprocess(stream: Process, timeout: float = 60.0) -> tuple[str, str]: 323 | stdout = '' 324 | stderr = '' 325 | try: 326 | stdout, stderr = await asyncio.wait_for(stream.communicate(), timeout) 327 | except asyncio.exceptions.TimeoutError: 328 | try: 329 | stream.kill() 330 | except OSError: 331 | # Ignore 'no such process' error 332 | pass 333 | raise Exception('run_subprocess timeout...') 334 | except Exception as e: 335 | raise e 336 | return (stdout.decode('utf-8'), stderr.decode('utf-8')) 337 | 338 | 339 | async def test_and_fix_files(meta: MarshaMeta, files: list[str], retries: int = 4, debug: bool = False): 340 | break_line = '\n' 341 | if retries == 0: 342 | raise Exception('Failed to fix code', meta.filename) 343 | # There should only be two files, the test file and the code file 344 | test_file = [file for file in files if file.endswith( 345 | f'{meta.filename}_test.py')][0] 346 | code_file = [file for file in files if file.endswith( 347 | f'{meta.filename}.py')][0] 348 | req_files = [file for file in files if file.endswith('requirements.txt')] 349 | # Define virtual environment path 350 | code_file_abspath = os.path.abspath(code_file) 351 | code_file_dir = os.path.dirname(code_file_abspath) 352 | venv_path = f'{code_file_dir}/venv' 353 | # Install requirements if needed 354 | req_file = None 355 | if len(req_files) > 0: 356 | req_file = req_files[0] 357 | if not os.path.exists(venv_path): 358 | print('Creating virtual environment...') 359 | try: 360 | create_venv_stream = await asyncio.create_subprocess_exec( 361 | python, '-m', 'venv', venv_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 362 | await run_subprocess(create_venv_stream) 363 | except Exception as e: 364 | if debug: 365 | print('Failed to create virtual environment', e) 366 | print('Installing requirements...') 367 | try: 368 | # define pip executable based on os 369 | pip_exe = f'{venv_path}/Scripts/pip.exe' if platform.system( 370 | ) == 'Windows' else f'{venv_path}/bin/pip' 371 | pip_stream = await asyncio.create_subprocess_exec( 372 | pip_exe, 'install', '--disable-pip-version-check', '--no-compile', '-r', req_file, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 373 | await run_subprocess(pip_stream, 120) 374 | except Exception as e: 375 | if debug: 376 | print('Failed to install requirements', e) 377 | 378 | # Run the test suite 379 | if not os.path.exists(venv_path): 380 | python_exe = python 381 | else: 382 | # define python executable based on os 383 | python_exe = f'{venv_path}/Scripts/python.exe' if platform.system( 384 | ) == 'Windows' else f'{venv_path}/bin/python' 385 | try: 386 | test_stream = await asyncio.create_subprocess_exec( 387 | python_exe, test_file, '-f', stdout=subprocess.PIPE, stderr=subprocess.PIPE) 388 | stdout, stderr = await run_subprocess(test_stream) 389 | test_results = f'''{stdout}{stderr}''' 390 | except Exception as e: 391 | print('Failed to run test suite...', e) 392 | test_results = None 393 | 394 | # Recursively work on fixing the files while the test suite fails, return when complete 395 | if test_results is not None and ("FAILED" in test_results or "Traceback" in test_results): 396 | if debug: 397 | print('Test failed, trying to fix code') 398 | print(test_results) 399 | test = read_file(test_file) 400 | code = read_file(code_file) 401 | requirements = read_file(req_file) if req_file is not None else None 402 | void_function_names = list( 403 | map(lambda f: extract_func_name(f), meta.void_funcs)) 404 | gpt_fix = ChatGPTMapper(f'''You are a senior software engineer helping a junior engineer fix some code that is failing. 405 | You are given the documentation of the functions they were assigned to write, followed by the functions they wrote, the unit tests they wrote, and the unit test results. 406 | Focus on just fixing the mistakes in the code and unit tests as necessary, trying to do the less number of changes. 407 | Do not write new unit tests, just fix the existing ones. 408 | {f"Do not make any reference to the functions {', '.join(void_function_names)} in `{meta.filename}_test.py`." if len(void_function_names) > 0 else ""} 409 | Make sure to produce working code that passes the unit tests. 410 | Make sure to follow PEP8 style guidelines. 411 | Make sure to include all needed standard Python libraries imports. 412 | Generate `requirements.txt` file with all needed dependencies, do not add fixed version to dependencies. 413 | Your response must not comment on what you changed. 414 | Your response must not add any additional comments, clarifications, notes, information, explanations, details, examples or thoughts. 415 | Your response must be a markdown file. 416 | The first section header must be the filename `{meta.filename}.py`. 417 | The content of the first section must be a python code block with the generated code. 418 | The second section header must be the filename `requirements.txt`. 419 | The content of the second section must be a text code block with the generated code. 420 | The third section header must be the filename `{meta.filename}_test.py`. 421 | The content of the third section must be a python code block with the generated code. 422 | The file should end with the code block, nothing else should be added to the file. 423 | The desired response must look like the following: 424 | 425 | # {meta.filename}.py 426 | 427 | ```py 428 | 429 | ``` 430 | 431 | # requirements.txt 432 | 433 | ```txt 434 | 435 | ``` 436 | 437 | # {meta.filename}_test.py 438 | 439 | ```py 440 | 441 | ``` 442 | 443 | ''', model='gpt-4', stats_stage='third_stage') 444 | fixed_code = await gpt_fix.run(f'''{format_marsha_for_llm(meta)} 445 | 446 | {f"""## Do not test the following functions: 447 | 448 | {break_line.join(map(lambda f: f"- {f}", void_function_names))}""" if len(void_function_names) > 0 else ""} 449 | 450 | # {code_file} 451 | 452 | ```py 453 | {code} 454 | ``` 455 | 456 | # requirements.txt 457 | 458 | ```txt 459 | {requirements if requirements is not None else ''} 460 | ``` 461 | 462 | # {test_file} 463 | 464 | ```py 465 | {test} 466 | ``` 467 | 468 | # Test Results 469 | 470 | {test_results}''') 471 | # The output should be a valid Markdown document. Parse it and return the parsed doc, on failure 472 | # try again (or fully error out, for now) 473 | try: 474 | # Some validation that the generated file matches the expected format of: 475 | # # function_name.py 476 | # ```py 477 | # 478 | # ``` 479 | # # requirements.txt 480 | # ```txt 481 | # 482 | # ``` 483 | # # function_name_test.py 484 | # ```py 485 | # 486 | # ``` 487 | if not validate_first_stage_markdown(fixed_code, meta.filename): 488 | raise Exception('Invalid output format') 489 | subdir = '/'.join(code_file.split('/')[:-1]) 490 | files = write_files_from_markdown(fixed_code, subdir=subdir) 491 | except Exception: 492 | if retries == 0: 493 | raise Exception('Failed to fix code', meta.filename) 494 | 495 | # We figure out if this pass has succeeded by re-running the tests recursively, where it 496 | # ejects from the iteration if the tests pass 497 | return await test_and_fix_files(meta, files, retries - 1, debug) 498 | elif test_results is None: # If the test suite failed to run, we try again 499 | return await test_and_fix_files(meta, files, retries - 1, debug) 500 | 501 | 502 | async def generate_python_code(args, meta: MarshaMeta, n_results: int, debug: bool) -> list[str]: 503 | t1 = time.time() 504 | print('Generating Python code...') 505 | mds = None 506 | try: 507 | if not args.exclude_sanity_check: 508 | if not await gpt_can_func_python(meta, n_results): 509 | await gpt_improve_func(meta) 510 | sys.exit(1) 511 | mds = await gpt_func_to_python(meta, n_results, debug=debug) 512 | except Exception as e: 513 | print('First stage failure') 514 | print(e) 515 | if debug: 516 | traceback.print_tb(e.__traceback__) 517 | print('Retrying...') 518 | raise e 519 | finally: 520 | t2 = time.time() 521 | stats.first_stage.total_time = prettify_time_delta( 522 | t2 - t1) 523 | return mds 524 | 525 | 526 | async def review_and_fix(args, meta: MarshaMeta, files: list[str], debug: bool = False): 527 | t_ssi = time.time() 528 | print('Parsing generated code...') 529 | try: 530 | await lint_and_fix_files(meta.filename, files, debug=debug) 531 | except Exception as e: 532 | print('Second stage failure') 533 | print(e) 534 | raise e 535 | finally: 536 | t_ssii = time.time() 537 | stats.second_stage.total_time = prettify_time_delta( 538 | t_ssii - t_ssi) 539 | if args.debug: 540 | for file in files: 541 | print(f'# {file}\n{read_file(file)}\n') 542 | t_tsi = time.time() 543 | print('Verifying and correcting generated code...') 544 | try: 545 | await test_and_fix_files(meta, files, debug=debug) 546 | except Exception as e: 547 | print('Third stage failure') 548 | print(e) 549 | raise e 550 | finally: 551 | t_tsii = time.time() 552 | stats.third_stage.total_time = prettify_time_delta( 553 | t_tsii - t_tsi) 554 | if args.debug: 555 | for file in files: 556 | print(f'# {file}\n{read_file(file)}\n') 557 | print('Formatting code...') 558 | autoformat_files(files) 559 | if args.debug: 560 | for file in files: 561 | print(f'# {file}\n{read_file(file)}\n') 562 | --------------------------------------------------------------------------------