├── .editorconfig ├── .flake8 ├── .gitignore ├── .gitmodules ├── .python-version ├── .vscode └── settings.json ├── CITATION ├── LICENSE ├── README.md ├── docs ├── index.html ├── index.js └── llama-tokenizer.js ├── llama2d.png ├── pyproject.toml ├── requirements.in ├── requirements.txt ├── screenshot.png ├── script.sh ├── src ├── __init__.py ├── data │ ├── .gitignore │ ├── mind2web_example.json │ ├── mind2web_example.mhtml │ ├── pretraining-cache │ │ └── .gitignore │ ├── pretraining_urls.py │ └── urls.txt ├── llama2d │ ├── __init__.py │ ├── constants.py │ ├── datasets │ │ ├── __init__.py │ │ ├── cached.py │ │ ├── huggingface.py │ │ ├── mhtml_to_hhtml.py │ │ ├── mind2web.py │ │ ├── mind2web_convert.py │ │ ├── pretraining.py │ │ └── synthetic │ │ │ ├── top_or_bottom.py │ │ │ ├── unscramble_words.py │ │ │ └── zoo_compass.py │ ├── find_pos_given_attr │ │ ├── download_mind2web.py │ │ └── find_pos_given_attr.py │ ├── modal │ │ ├── __init__.py │ │ ├── common.py │ │ ├── datasets │ │ │ ├── cached_dataset.py │ │ │ ├── hf_dataset.py │ │ │ ├── modal_docs.jsonl │ │ │ ├── new_dataset.py │ │ │ ├── sql_dataset.py │ │ │ └── zoo_dataset.py │ │ ├── finetuning.py │ │ ├── flat_param.py │ │ ├── inference.py │ │ ├── repro.py │ │ ├── requirements.txt │ │ ├── train.py │ │ ├── urls.txt │ │ └── validate_dataset.py │ ├── tagging │ │ ├── add_tags_to_page.py │ │ └── tagUtils.js │ └── vision │ │ ├── __init__.py │ │ ├── learn_mlp_on_embeds.py │ │ ├── ocr.py │ │ ├── render_dataset.py │ │ ├── take_screenshot.py │ │ ├── url_to_llama_input.py │ │ ├── viz_pt_input.py │ │ └── webutils │ │ ├── LICENSE.chromedriver │ │ ├── chromedriver │ │ ├── playwright_browser.py │ │ ├── selenium_action_chain.py │ │ ├── stacked_image.png │ │ ├── stitch_webpage.py │ │ └── web_to_action.py ├── mhtml │ ├── demos │ │ ├── finance.mhtml │ │ ├── local.mhtml │ │ ├── megabus.mhtml │ │ ├── megabus2.mhtml │ │ └── megabus3.mhtml │ ├── download.js │ ├── finance.json │ ├── index.js │ ├── package-lock.json │ ├── package.json │ ├── serve.js │ └── serve_local_data.js ├── models │ └── .gitignore └── secrets │ └── .gitignore └── tests └── testing.py /.editorconfig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/.editorconfig -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/.flake8 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/.gitmodules -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11.4 -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /CITATION: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/CITATION -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/README.md -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/docs/index.html -------------------------------------------------------------------------------- /docs/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/docs/index.js -------------------------------------------------------------------------------- /docs/llama-tokenizer.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/docs/llama-tokenizer.js -------------------------------------------------------------------------------- /llama2d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/llama2d.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/requirements.in -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/requirements.txt -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/screenshot.png -------------------------------------------------------------------------------- /script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/script.sh -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/__init__.py -------------------------------------------------------------------------------- /src/data/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/data/.gitignore -------------------------------------------------------------------------------- /src/data/mind2web_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/data/mind2web_example.json -------------------------------------------------------------------------------- /src/data/mind2web_example.mhtml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/data/mind2web_example.mhtml -------------------------------------------------------------------------------- /src/data/pretraining-cache/.gitignore: -------------------------------------------------------------------------------- 1 | **/* 2 | !.gitignore -------------------------------------------------------------------------------- /src/data/pretraining_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/data/pretraining_urls.py -------------------------------------------------------------------------------- /src/data/urls.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/data/urls.txt -------------------------------------------------------------------------------- /src/llama2d/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llama2d/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/constants.py -------------------------------------------------------------------------------- /src/llama2d/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llama2d/datasets/cached.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/cached.py -------------------------------------------------------------------------------- /src/llama2d/datasets/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/huggingface.py -------------------------------------------------------------------------------- /src/llama2d/datasets/mhtml_to_hhtml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/mhtml_to_hhtml.py -------------------------------------------------------------------------------- /src/llama2d/datasets/mind2web.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/mind2web.py -------------------------------------------------------------------------------- /src/llama2d/datasets/mind2web_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/mind2web_convert.py -------------------------------------------------------------------------------- /src/llama2d/datasets/pretraining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/pretraining.py -------------------------------------------------------------------------------- /src/llama2d/datasets/synthetic/top_or_bottom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/synthetic/top_or_bottom.py -------------------------------------------------------------------------------- /src/llama2d/datasets/synthetic/unscramble_words.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/synthetic/unscramble_words.py -------------------------------------------------------------------------------- /src/llama2d/datasets/synthetic/zoo_compass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/datasets/synthetic/zoo_compass.py -------------------------------------------------------------------------------- /src/llama2d/find_pos_given_attr/download_mind2web.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/find_pos_given_attr/download_mind2web.py -------------------------------------------------------------------------------- /src/llama2d/find_pos_given_attr/find_pos_given_attr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/find_pos_given_attr/find_pos_given_attr.py -------------------------------------------------------------------------------- /src/llama2d/modal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llama2d/modal/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/common.py -------------------------------------------------------------------------------- /src/llama2d/modal/datasets/cached_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/datasets/cached_dataset.py -------------------------------------------------------------------------------- /src/llama2d/modal/datasets/hf_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/datasets/hf_dataset.py -------------------------------------------------------------------------------- /src/llama2d/modal/datasets/modal_docs.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/datasets/modal_docs.jsonl -------------------------------------------------------------------------------- /src/llama2d/modal/datasets/new_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/datasets/new_dataset.py -------------------------------------------------------------------------------- /src/llama2d/modal/datasets/sql_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/datasets/sql_dataset.py -------------------------------------------------------------------------------- /src/llama2d/modal/datasets/zoo_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/datasets/zoo_dataset.py -------------------------------------------------------------------------------- /src/llama2d/modal/finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/finetuning.py -------------------------------------------------------------------------------- /src/llama2d/modal/flat_param.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/flat_param.py -------------------------------------------------------------------------------- /src/llama2d/modal/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/inference.py -------------------------------------------------------------------------------- /src/llama2d/modal/repro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/repro.py -------------------------------------------------------------------------------- /src/llama2d/modal/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/requirements.txt -------------------------------------------------------------------------------- /src/llama2d/modal/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/train.py -------------------------------------------------------------------------------- /src/llama2d/modal/urls.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/urls.txt -------------------------------------------------------------------------------- /src/llama2d/modal/validate_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/modal/validate_dataset.py -------------------------------------------------------------------------------- /src/llama2d/tagging/add_tags_to_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/tagging/add_tags_to_page.py -------------------------------------------------------------------------------- /src/llama2d/tagging/tagUtils.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/tagging/tagUtils.js -------------------------------------------------------------------------------- /src/llama2d/vision/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/__init__.py -------------------------------------------------------------------------------- /src/llama2d/vision/learn_mlp_on_embeds.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/learn_mlp_on_embeds.py -------------------------------------------------------------------------------- /src/llama2d/vision/ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/ocr.py -------------------------------------------------------------------------------- /src/llama2d/vision/render_dataset.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llama2d/vision/take_screenshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/take_screenshot.py -------------------------------------------------------------------------------- /src/llama2d/vision/url_to_llama_input.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/url_to_llama_input.py -------------------------------------------------------------------------------- /src/llama2d/vision/viz_pt_input.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/viz_pt_input.py -------------------------------------------------------------------------------- /src/llama2d/vision/webutils/LICENSE.chromedriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/webutils/LICENSE.chromedriver -------------------------------------------------------------------------------- /src/llama2d/vision/webutils/chromedriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/webutils/chromedriver -------------------------------------------------------------------------------- /src/llama2d/vision/webutils/playwright_browser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/webutils/playwright_browser.py -------------------------------------------------------------------------------- /src/llama2d/vision/webutils/selenium_action_chain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/webutils/selenium_action_chain.py -------------------------------------------------------------------------------- /src/llama2d/vision/webutils/stacked_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/webutils/stacked_image.png -------------------------------------------------------------------------------- /src/llama2d/vision/webutils/stitch_webpage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/webutils/stitch_webpage.py -------------------------------------------------------------------------------- /src/llama2d/vision/webutils/web_to_action.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/llama2d/vision/webutils/web_to_action.py -------------------------------------------------------------------------------- /src/mhtml/demos/finance.mhtml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/demos/finance.mhtml -------------------------------------------------------------------------------- /src/mhtml/demos/local.mhtml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/demos/local.mhtml -------------------------------------------------------------------------------- /src/mhtml/demos/megabus.mhtml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/demos/megabus.mhtml -------------------------------------------------------------------------------- /src/mhtml/demos/megabus2.mhtml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/demos/megabus2.mhtml -------------------------------------------------------------------------------- /src/mhtml/demos/megabus3.mhtml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/demos/megabus3.mhtml -------------------------------------------------------------------------------- /src/mhtml/download.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/download.js -------------------------------------------------------------------------------- /src/mhtml/finance.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/finance.json -------------------------------------------------------------------------------- /src/mhtml/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/index.js -------------------------------------------------------------------------------- /src/mhtml/package-lock.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/package-lock.json -------------------------------------------------------------------------------- /src/mhtml/package.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/package.json -------------------------------------------------------------------------------- /src/mhtml/serve.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/serve.js -------------------------------------------------------------------------------- /src/mhtml/serve_local_data.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/src/mhtml/serve_local_data.js -------------------------------------------------------------------------------- /src/models/.gitignore: -------------------------------------------------------------------------------- 1 | **/* 2 | 3 | !.gitignore 4 | -------------------------------------------------------------------------------- /src/secrets/.gitignore: -------------------------------------------------------------------------------- 1 | **/* 2 | !.gitignore -------------------------------------------------------------------------------- /tests/testing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Llama2D/llama2d/HEAD/tests/testing.py --------------------------------------------------------------------------------