├── README.md
├── demos
    └── patchscopes_app.py
├── requirements.txt
├── scripts
    ├── run_expansion_estimation.sh
    └── run_vocab_expansion.sh
├── src
    └── tokens2words
    │   ├── __init__.py
    │   ├── analysis
    │       └── identified_in_patchscopes.py
    │   ├── experiments
    │       ├── __init__.py
    │       └── detokenization
    │       │   ├── __init__.py
    │       │   ├── tokens_aggregation.py
    │       │   ├── utils.py
    │       │   ├── word_retrieval_multi_tokens.py
    │       │   ├── word_retrieval_seperations.py
    │       │   └── word_retrieval_typos.py
    │   ├── processor.py
    │   ├── representation_translator.py
    │   ├── run_new_vocab_success_estimate.py
    │   ├── run_patchscopes.py
    │   ├── run_vocab_expansion_eval.py
    │   ├── utils
    │       ├── __init__.py
    │       ├── calibration_utils.py
    │       ├── core_vocab_utils.py
    │       ├── data_utils.py
    │       ├── downstream_utils.py
    │       ├── enums.py
    │       ├── eval_utils.py
    │       ├── file_utils.py
    │       ├── hebrew_utils.py
    │       ├── logit_lens.py
    │       ├── logits_utils.py
    │       ├── model_utils.py
    │       └── procrustes
    │       │   ├── __init__.py
    │       │   ├── orthogonal.py
    │       │   └── utils.py
    │   ├── vocab_modifier.py
    │   └── word_retriever.py
└── word_lists
    ├── top_5k_arabic_words.txt
    └── top_5k_hebrew_words_without_nikud.txt


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/README.md


--------------------------------------------------------------------------------
/demos/patchscopes_app.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/demos/patchscopes_app.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/requirements.txt


--------------------------------------------------------------------------------
/scripts/run_expansion_estimation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/scripts/run_expansion_estimation.sh


--------------------------------------------------------------------------------
/scripts/run_vocab_expansion.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/scripts/run_vocab_expansion.sh


--------------------------------------------------------------------------------
/src/tokens2words/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/tokens2words/analysis/identified_in_patchscopes.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/analysis/identified_in_patchscopes.py


--------------------------------------------------------------------------------
/src/tokens2words/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/tokens2words/experiments/detokenization/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/tokens2words/experiments/detokenization/tokens_aggregation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/experiments/detokenization/tokens_aggregation.py


--------------------------------------------------------------------------------
/src/tokens2words/experiments/detokenization/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/experiments/detokenization/utils.py


--------------------------------------------------------------------------------
/src/tokens2words/experiments/detokenization/word_retrieval_multi_tokens.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/experiments/detokenization/word_retrieval_multi_tokens.py


--------------------------------------------------------------------------------
/src/tokens2words/experiments/detokenization/word_retrieval_seperations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/experiments/detokenization/word_retrieval_seperations.py


--------------------------------------------------------------------------------
/src/tokens2words/experiments/detokenization/word_retrieval_typos.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/experiments/detokenization/word_retrieval_typos.py


--------------------------------------------------------------------------------
/src/tokens2words/processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/processor.py


--------------------------------------------------------------------------------
/src/tokens2words/representation_translator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/representation_translator.py


--------------------------------------------------------------------------------
/src/tokens2words/run_new_vocab_success_estimate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/run_new_vocab_success_estimate.py


--------------------------------------------------------------------------------
/src/tokens2words/run_patchscopes.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/run_patchscopes.py


--------------------------------------------------------------------------------
/src/tokens2words/run_vocab_expansion_eval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/run_vocab_expansion_eval.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/tokens2words/utils/calibration_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/calibration_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/core_vocab_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/core_vocab_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/data_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/downstream_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/downstream_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/enums.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/enums.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/eval_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/file_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/file_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/hebrew_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/hebrew_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/logit_lens.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/logit_lens.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/logits_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/logits_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/model_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/model_utils.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/procrustes/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/tokens2words/utils/procrustes/orthogonal.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/procrustes/orthogonal.py


--------------------------------------------------------------------------------
/src/tokens2words/utils/procrustes/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/utils/procrustes/utils.py


--------------------------------------------------------------------------------
/src/tokens2words/vocab_modifier.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/vocab_modifier.py


--------------------------------------------------------------------------------
/src/tokens2words/word_retriever.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/src/tokens2words/word_retriever.py


--------------------------------------------------------------------------------
/word_lists/top_5k_arabic_words.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/word_lists/top_5k_arabic_words.txt


--------------------------------------------------------------------------------
/word_lists/top_5k_hebrew_words_without_nikud.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwartz-lab-NLP/Tokens2Words/HEAD/word_lists/top_5k_hebrew_words_without_nikud.txt


--------------------------------------------------------------------------------