├── .github └── FUNDING.yml ├── .gitattributes ├── com.doji.transformers ├── Editor │ ├── TokenizerModelmporter.cs.meta │ ├── TokenizerModelAssetEditor.cs.meta │ ├── TokenizerModelAssetEditor.cs │ ├── Doji.Transformers.Editor.asmdef.meta │ ├── Doji.Transformers.Editor.asmdef │ └── TokenizerModelmporter.cs ├── Runtime │ ├── Scripts │ │ ├── Unity │ │ │ ├── TokenizerModelAsset.cs.meta │ │ │ └── TokenizerModelAsset.cs │ │ ├── CacheUtils │ │ │ ├── OffloadedCache.cs │ │ │ ├── QuantizedCache.cs │ │ │ ├── HQQQuantizedCache.cs │ │ │ ├── QuantoQuantizedCache.cs │ │ │ ├── SlidingWindowCache.cs │ │ │ ├── Cache.cs.meta │ │ │ ├── DynamicCache.cs.meta │ │ │ ├── HybridCache.cs.meta │ │ │ ├── MambaCache.cs.meta │ │ │ ├── SinkCache.cs.meta │ │ │ ├── StaticCache.cs.meta │ │ │ ├── HQQQuantizedCache.cs.meta │ │ │ ├── OffloadedCache.cs.meta │ │ │ ├── QuantizedCache.cs.meta │ │ │ ├── EncoderDecoderCache.cs.meta │ │ │ ├── QuantizedCacheConfig.cs.meta │ │ │ ├── QuantoQuantizedCache.cs.meta │ │ │ ├── SlidingWindowCache.cs.meta │ │ │ ├── SinkCache.cs │ │ │ ├── MambaCache.cs │ │ │ ├── HybridCache.cs │ │ │ ├── StaticCache.cs │ │ │ ├── EncoderDecoderCache.cs │ │ │ ├── Cache.cs │ │ │ ├── QuantizedCacheConfig.cs │ │ │ └── DynamicCache.cs │ │ ├── TokenizationUtils │ │ │ ├── TokenizationUtils.cs │ │ │ ├── Token.cs.meta │ │ │ ├── Trie.cs.meta │ │ │ ├── TokenString.cs.meta │ │ │ ├── PreTrainedTokenizer.cs.meta │ │ │ ├── TokenString.cs │ │ │ ├── TokenizationUtils.cs.meta │ │ │ └── Token.cs │ │ ├── Models.meta │ │ ├── Unity.meta │ │ ├── Utils.meta │ │ ├── CacheUtils.meta │ │ ├── Generation.meta │ │ ├── Models │ │ │ ├── Auto.meta │ │ │ ├── Clip.meta │ │ │ ├── GPT2.meta │ │ │ ├── Phi3.meta │ │ │ ├── Llama.meta │ │ │ ├── Clip │ │ │ │ ├── Vocab.cs.meta │ │ │ │ ├── BasicTokenizer.cs.meta │ │ │ │ ├── ClipTokenizer.cs.meta │ │ │ │ ├── Vocab.cs │ │ │ │ └── BasicTokenizer.cs │ │ │ ├── Auto │ │ │ │ ├── AutoTokenizer.cs.meta │ │ │ │ ├── AutoModelForCausalLM.cs.meta │ │ │ │ ├── AutoModelForCausalLM.cs │ │ │ │ └── AutoTokenizer.cs │ │ │ ├── GPT2 │ │ │ │ └── GPT2Tokenizer.cs.meta │ │ │ ├── Llama │ │ │ │ └── LlamaTokenizer.cs.meta │ │ │ └── Phi3 │ │ │ │ ├── Phi3ForCausalLM.cs.meta │ │ │ │ └── Phi3ForCausalLM.cs │ │ ├── Generation │ │ │ ├── Utils.meta │ │ │ ├── BeamSearch.meta │ │ │ ├── LogitsProcess.meta │ │ │ ├── ConfigurationUtils.meta │ │ │ ├── StoppingCriteria.meta │ │ │ ├── Utils │ │ │ │ ├── GenerationMixin.cs.meta │ │ │ │ ├── GenerateDecoderOnlyOutput.cs.meta │ │ │ │ └── GenerateDecoderOnlyOutput.cs │ │ │ ├── BeamSearch │ │ │ │ ├── BeamConstraints.cs.meta │ │ │ │ ├── BeamSearchScorer.cs.meta │ │ │ │ ├── ConstrainedBeamSearchScorer.cs.meta │ │ │ │ ├── BeamSearchScorer.cs │ │ │ │ ├── BeamConstraints.cs │ │ │ │ └── ConstrainedBeamSearchScorer.cs │ │ │ ├── LogitsProcess │ │ │ │ ├── LogitsProcessor.cs.meta │ │ │ │ ├── LogitsProcessorList.cs.meta │ │ │ │ └── LogitsProcessorList.cs │ │ │ ├── StoppingCriteria │ │ │ │ ├── MaxTimeCriteria.cs.meta │ │ │ │ ├── EosTokenCriteria.cs.meta │ │ │ │ ├── MaxLengthCriteria.cs.meta │ │ │ │ ├── MaxNewTokensCriteria.cs.meta │ │ │ │ ├── StopStringCriteria.cs.meta │ │ │ │ ├── StoppingCriteria.cs.meta │ │ │ │ ├── StoppingCriteriaList.cs.meta │ │ │ │ ├── StoppingCriteria.cs │ │ │ │ ├── StopStringCriteria.cs │ │ │ │ ├── StoppingCriteriaList.cs │ │ │ │ ├── EosTokenCriteria.cs │ │ │ │ ├── MaxTimeCriteria.cs │ │ │ │ ├── MaxNewTokensCriteria.cs │ │ │ │ └── MaxLengthCriteria.cs │ │ │ └── ConfigurationUtils │ │ │ │ └── GenerationConfig.cs.meta │ │ ├── ConfigurationUtils.meta │ │ ├── TokenizationUtils.meta │ │ ├── TokenizationUtilsBase.meta │ │ ├── Utils │ │ │ ├── Debug.cs.meta │ │ │ ├── Kwargs.cs.meta │ │ │ ├── Log.cs.meta │ │ │ ├── ModelOutput.cs.meta │ │ │ ├── OrderedDictionary.cs.meta │ │ │ ├── Debug.cs │ │ │ ├── Log.cs │ │ │ ├── ModelOutput.cs │ │ │ ├── Kwargs.cs │ │ │ └── OrderedDictionary.cs │ │ ├── ConfigurationUtils │ │ │ ├── PathUtils.cs.meta │ │ │ ├── Configurable.cs.meta │ │ │ ├── PretrainedConfig.cs.meta │ │ │ ├── PretrainedModel.cs.meta │ │ │ ├── PathUtils.cs │ │ │ ├── PretrainedConfig.cs │ │ │ ├── Configurable.cs │ │ │ └── PretrainedModel.cs │ │ └── TokenizationUtilsBase │ │ │ ├── Input.cs.meta │ │ │ ├── AddedToken.cs.meta │ │ │ ├── Encoding.cs.meta │ │ │ ├── BatchEncoding.cs.meta │ │ │ ├── EncodingParams.cs.meta │ │ │ ├── InputConverter.cs.meta │ │ │ ├── InputEncoding.cs.meta │ │ │ ├── TokenizerConfig.cs.meta │ │ │ ├── ISpecialTokensMixin.cs.meta │ │ │ ├── SpecialTokensMixin.cs.meta │ │ │ ├── PreTrainedTokenizerBase.cs.meta │ │ │ ├── InputEncoding.cs │ │ │ ├── AddedToken.cs │ │ │ ├── Encoding.cs │ │ │ ├── ISpecialTokensMixin.cs │ │ │ ├── BatchEncoding.cs │ │ │ ├── EncodingParams.cs │ │ │ ├── TokenizerConfig.cs │ │ │ ├── InputConverter.cs │ │ │ ├── Input.cs │ │ │ └── SpecialTokensMixin.cs │ ├── AssemblyInfo.cs │ ├── Scripts.meta │ ├── Doji.Transformers.asmdef.meta │ ├── AssemblyInfo.cs.meta │ └── Doji.Transformers.asmdef ├── readme.md ├── readme.md.meta ├── CHANGELOG.md.meta ├── Tests │ ├── Editor │ │ ├── Resources │ │ │ ├── Phi-3-mini-4k-instruct │ │ │ │ ├── tokenizer.model │ │ │ │ └── tokenizer.model.meta │ │ │ └── Phi-3-mini-4k-instruct.meta │ │ ├── Resources.meta │ │ ├── Doji.Transformers.Editor.Tests.asmdef.meta │ │ ├── TrieTest.cs.meta │ │ ├── BasicTokenizerTest.cs.meta │ │ ├── BytesToUnicodeTest.cs.meta │ │ ├── ClipTokenizerTest.cs.meta │ │ ├── LLamaTokenizerTest.cs.meta │ │ ├── TokenizerConfigTest.cs.meta │ │ ├── InputSerializationTest.cs.meta │ │ ├── BasicTokenizerTest.cs │ │ ├── Doji.Transformers.Editor.Tests.asmdef │ │ ├── TrieTest.cs │ │ ├── InputSerializationTest.cs │ │ ├── TokenizerConfigTest.cs │ │ ├── LLamaTokenizerTest.cs │ │ └── ClipTokenizerTest.cs │ └── Editor.meta ├── package.json.meta ├── Editor.meta ├── Runtime.meta ├── Tests.meta ├── Third Party Notices.md.meta ├── Samples~ │ ├── Doji.Transformers.Samples.asmdef.meta │ ├── 01-Phi3ForCausalLMSample.meta │ ├── 01-Phi3ForCausalLMSample │ │ ├── Phi3ForCausalLMSample.unity.meta │ │ ├── Phi3ForCausalLMSample.cs.meta │ │ └── Phi3ForCausalLMSample.cs │ └── Doji.Transformers.Samples.asmdef ├── package.json └── CHANGELOG.md ├── projects └── Transformers │ ├── .vsconfig │ ├── ProjectSettings │ ├── ProjectVersion.txt │ ├── ClusterInputManager.asset │ ├── PresetManager.asset │ ├── VersionControlSettings.asset │ ├── EditorBuildSettings.asset │ ├── TimeManager.asset │ ├── AudioManager.asset │ ├── VFXManager.asset │ ├── TagManager.asset │ ├── PackageManagerSettings.asset │ ├── UnityConnectSettings.asset │ ├── MemorySettings.asset │ ├── DynamicsManager.asset │ ├── EditorSettings.asset │ ├── NavMeshAreas.asset │ ├── Physics2DSettings.asset │ ├── GraphicsSettings.asset │ └── InputManager.asset │ ├── Packages │ └── manifest.json │ └── .gitignore ├── LICENSE └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: julienkay 2 | patreon: julienkay 3 | ko_fi: julienkay 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.onnx filter=lfs diff=lfs merge=lfs -text 2 | *.pdf filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /com.doji.transformers/Editor/TokenizerModelmporter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d3b5c76bfa7311a4d8854a250441a576 -------------------------------------------------------------------------------- /com.doji.transformers/Editor/TokenizerModelAssetEditor.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 19d2c9dca8aa8964da1a2d257055e03d -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Unity/TokenizerModelAsset.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 17752eb76eabbec47a1d0c86c1931220 -------------------------------------------------------------------------------- /projects/Transformers/.vsconfig: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "components": [ 4 | "Microsoft.VisualStudio.Workload.ManagedGame" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/ProjectVersion.txt: -------------------------------------------------------------------------------- 1 | m_EditorVersion: 6000.1.0b11 2 | m_EditorVersionWithRevision: 6000.1.0b11 (c7b7080a3879) 3 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/AssemblyInfo.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/julienkay/com.doji.transformers/HEAD/com.doji.transformers/Runtime/AssemblyInfo.cs -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/OffloadedCache.cs: -------------------------------------------------------------------------------- 1 | namespace Doji.AI.Transformers { 2 | public class OffloadedCache : DynamicCache { } 3 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/QuantizedCache.cs: -------------------------------------------------------------------------------- 1 | namespace Doji.AI.Transformers { 2 | public class QuantizedCache : DynamicCache { } 3 | } -------------------------------------------------------------------------------- /com.doji.transformers/readme.md: -------------------------------------------------------------------------------- 1 | # com.doji.transformers 2 | 3 | This is a Unity package that allows users to use pretrained transformer models with Unity Sentis. -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/HQQQuantizedCache.cs: -------------------------------------------------------------------------------- 1 | namespace Doji.AI.Transformers { 2 | public class HQQQuantizedCache : QuantizedCache { } 3 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/QuantoQuantizedCache.cs: -------------------------------------------------------------------------------- 1 | namespace Doji.AI.Transformers { 2 | public class QuantoQuantizedCache : QuantizedCache { } 3 | } -------------------------------------------------------------------------------- /com.doji.transformers/Editor/TokenizerModelAssetEditor.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/julienkay/com.doji.transformers/HEAD/com.doji.transformers/Editor/TokenizerModelAssetEditor.cs -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/ClusterInputManager.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!236 &1 4 | ClusterInputManager: 5 | m_ObjectHideFlags: 0 6 | m_Inputs: [] 7 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils/TokenizationUtils.cs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/julienkay/com.doji.transformers/HEAD/com.doji.transformers/Runtime/Scripts/TokenizationUtils/TokenizationUtils.cs -------------------------------------------------------------------------------- /com.doji.transformers/readme.md.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 917360d0f87ed8142b912c9a63435049 3 | TextScriptImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/CHANGELOG.md.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 46e429a4e1de1504bbd2978ac5937246 3 | TextScriptImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/Resources/Phi-3-mini-4k-instruct/tokenizer.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/julienkay/com.doji.transformers/HEAD/com.doji.transformers/Tests/Editor/Resources/Phi-3-mini-4k-instruct/tokenizer.model -------------------------------------------------------------------------------- /com.doji.transformers/package.json.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8740d8fa4b2ff32458e5d90e3612ce44 3 | PackageManifestImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/PresetManager.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!1386491679 &1 4 | PresetManager: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 2 7 | m_DefaultPresets: {} 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Editor.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 5082529ff3502a84db58e4a60e725035 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 82cda41f14b8b9b419109dc54a971706 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 84466b16e5ee9aa49bbc434e5cac49dd 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Third Party Notices.md.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 103356b0aee32224d9a0820896006da0 3 | TextScriptImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a885f675a34f39448aec4011c77ea49a 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 22eb84fcb44f12e42839017570918267 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Doji.Transformers.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 792a551df4a0292469be78bbb192f6d6 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 29e1947105a1cc94f967af095a61e89c 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Unity.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8e73c443bf402b24c924ed54bdc41027 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 85a41bcca6d82e748bf281943f36a1e3 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/Resources.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a6a395a4e34ea62418a701dbb16eeb1b 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 1449a477f1c2bbe4cbe8ba9160725f09 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9086e417822ddc5418049acaa715cee4 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Auto.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9e47d1a77d8ea8d48b0e50d118e692e2 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Clip.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a299a45c950fe2b4f8d9d36c0e72ff45 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/GPT2.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9ea4f52de85a42b4486c0688387cb5fc 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Phi3.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ecd61570b32c0bd4d85c04591ba0bac4 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Editor/Doji.Transformers.Editor.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7a6c8e5f14dcfb54687e69e9da5e9337 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/Utils.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: dbd4f4b1862dc4846acf819495ee9807 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Llama.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 929599dd5c4fd6a48913176a1ca45927 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Samples~/Doji.Transformers.Samples.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 938c885529b64804f956233d3ba32d17 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: eeeb0d3648e4f674787ba99cac94d043 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/BeamSearch.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4b8c2a1403bca6442aed477d45cd3d88 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 14d227f1a0b4a7a4b8cdaaa6bef21443 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6637bc58aa413974fb878af317554ef3 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Samples~/01-Phi3ForCausalLMSample.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6e75bcd0d28c3bc4e97e815cde80ea7e 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/VersionControlSettings.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!890905787 &1 4 | VersionControlSettings: 5 | m_ObjectHideFlags: 0 6 | m_Mode: Visible Meta Files 7 | m_TrackPackagesOutsideProject: 0 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/LogitsProcess.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 570407d74f0347b449eaac017c1f17fd 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Samples~/01-Phi3ForCausalLMSample/Phi3ForCausalLMSample.unity.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 68d3ca225f8fbb54aa973ab6ad33f777 3 | DefaultImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/Doji.Transformers.Editor.Tests.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2bef6e4a22d78984da44c22b365f928e 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/Resources/Phi-3-mini-4k-instruct/tokenizer.model.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 1b7235facc03fe446bf7dbf447af1292 3 | DefaultImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/ConfigurationUtils.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 067be3b5053ba874589d5017b2739418 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: dd83c37c84e9a494799d0391e5814543 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/Resources/Phi-3-mini-4k-instruct.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e8a9f7a415fee2c47ba91b7256c397b7 3 | folderAsset: yes 4 | DefaultImporter: 5 | externalObjects: {} 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/SlidingWindowCache.cs: -------------------------------------------------------------------------------- 1 | namespace Doji.AI.Transformers { 2 | public class SlidingWindowCache : StaticCache { 3 | public override void Reset() { 4 | throw new System.NotImplementedException(); 5 | } 6 | } 7 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/EditorBuildSettings.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!1045 &1 4 | EditorBuildSettings: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 2 7 | m_Scenes: [] 8 | m_configObjects: {} 9 | m_UseUCBPForAssetBundles: 0 10 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/AssemblyInfo.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a5437734edc3fcf45af7ac9546a0533f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/TrieTest.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e24e9e36c0e3ad74ab1f1495b9040259 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/Debug.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7a94af4b988d4334fa91e7481c326954 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/Kwargs.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f0210c70a441b594da2c64bb4cf331f2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/Log.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6bec32746b6817f4e85f7d5f351227e5 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/Cache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3a81710ce5701b348b0e351ab0074a0f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Clip/Vocab.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3de71716ff9bcf14286b932192f0cb77 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/ModelOutput.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: fc884dd765496e54286a47b5f22b7621 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/BasicTokenizerTest.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 13a0c8d19b6bd1c4aa66a4c2ab35d1b8 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/BytesToUnicodeTest.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ceb534661c4b7cd4aa499e215557c119 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/ClipTokenizerTest.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: cf574ef5dbf20c241905ba0ddcebe5a9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/LLamaTokenizerTest.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 50415fcb05310ad4a80d066ab98fdba0 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/TokenizerConfigTest.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 62c0948235a9f7942a539ee97a532035 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/DynamicCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 27f8968e31d24454c8b13d274c348fb1 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/HybridCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c62c9c54a3d7df84cad793b25dc3e872 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/MambaCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ba4e5e81d8c03ce44b67859fc852fbde 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/SinkCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 89cb01f6d74ce97429815a18c01a06ee 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/StaticCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ced94b7b4d8b8ce4987253a95e0b9bcf 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils/Token.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 400af243cd2fe3d4d8c43f34d5738578 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils/Trie.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8bb7f3eb0c640ef45b5cda5276a837ad 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/OrderedDictionary.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 82b6ad5e0a9bad740925594bda27d2ce 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/InputSerializationTest.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 346b3dcd83d8f0645981f2bbed667879 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/HQQQuantizedCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 61fac4ebe77258a4d90ce29af4692f0f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/OffloadedCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b4282e409159a874790fb8002b5d0780 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/QuantizedCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 1ce707cf113d7cf478706842dbb72165 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils/PathUtils.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 94082089901b52c40bea238cfccb01c6 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Auto/AutoTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 0abfc11d81170b842bf8194a520a0cb3 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Clip/BasicTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9df7122102420e645a2443902e8a4622 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Clip/ClipTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6a616cad27a6db04580268b6d60c70a4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/GPT2/GPT2Tokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bc8b7e19c2ce6aa43a639703b82f9741 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Llama/LlamaTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3750db36e7997b448a2b9a8375f9a67e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Phi3/Phi3ForCausalLM.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: df86978afd511dc40889b76dfe8fa7b7 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/Input.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2bd929174e237e34690cf64c4417adb2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/EncoderDecoderCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7c2db6a98bc4a214fa2d09e76fdbe0a9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/QuantizedCacheConfig.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4e890df5385526045b16676d3afc11e6 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/QuantoQuantizedCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7ccfad3b13d8d7349be0da4ea0f06747 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/SlidingWindowCache.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 5051b195d9d8f3d46a0e6a3cb139a265 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils/Configurable.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3c00464cd56f4be40bc1f39b538d35d4 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/Utils/GenerationMixin.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bc8b69c087b42024185531c28d2030e3 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Auto/AutoModelForCausalLM.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ae0a91e80d3172e4fb61b3b3a8b0cf9b 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils/TokenString.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e8160b1649958cb408918fe5a084323f 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/AddedToken.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ec1550517c6b82b44900c5ebda285693 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/Encoding.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8bc5abddd1b59e340a5f07d94c5620f2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils/PretrainedConfig.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f996a37eb060d2b40a3427584acc55dd 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils/PretrainedModel.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 98a131194766a3446a6c20c1a2e7f420 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/BeamSearch/BeamConstraints.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4ae17b122b496114a869c8381bc8bd44 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/BeamSearch/BeamSearchScorer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 61b1d8ce92f8d7f49abe63bf06353d58 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils/PreTrainedTokenizer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9b3a7447f5d50724eab710c9507ab28b 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils/TokenString.cs: -------------------------------------------------------------------------------- 1 | namespace Doji.AI.Transformers { 2 | 3 | public class TokenString : Token { 4 | 5 | public TokenString(string content) { 6 | Content = content; 7 | } 8 | 9 | public override string ToString() { 10 | return Content; 11 | } 12 | } 13 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils/TokenizationUtils.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 31ba69c39e2e0ec49b0a87fb8da4cdff 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/BatchEncoding.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 924d64ce69da68846b7e4618e87aacf9 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/EncodingParams.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 584a28b129bff3940bf4f10d87bd9868 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/InputConverter.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c08f76d44b7b4874996c12cc6273947c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/InputEncoding.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ff0d94c4fc89ff74cb387f5aac3d42fe 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/TokenizerConfig.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3178412ce57f70b40a273753efef56ca 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/LogitsProcess/LogitsProcessor.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f010f62fdf0051142b1c0ab32d28f6aa 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/MaxTimeCriteria.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 9c63976f6c97b2646989e646b81c514e 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/Utils/GenerateDecoderOnlyOutput.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 6eb8dbd66d75e744394ddac38a0715bc 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/ISpecialTokensMixin.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 4276b950140259644b51a1153c1a5cb6 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/SpecialTokensMixin.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bb9ec8f58abafce4eab5db3b2d4cfe59 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Samples~/01-Phi3ForCausalLMSample/Phi3ForCausalLMSample.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 869f64df865bd75498fcd3f3b0736dbe 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/ConfigurationUtils/GenerationConfig.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 0cd5a5e27a3e2a44f96672bdefd8198c 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/LogitsProcess/LogitsProcessorList.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 1dae6636a4bf32347856311d415047e6 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/EosTokenCriteria.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 7c14cfddfa0a9614a941588cb689fbfb 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/MaxLengthCriteria.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 81e6001a8b5c6f445afba4aca7783975 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/MaxNewTokensCriteria.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: a966c9df959d26e4fba12c36ab8d94bb 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/StopStringCriteria.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: b4adc7e946879f2409ed4b5494caff52 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/StoppingCriteria.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 021347ea16859df47863fa2137a838b1 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/StoppingCriteriaList.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 37b0102a080965e4d895a22814ea9c26 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/PreTrainedTokenizerBase.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d035bc9ac0ccb0b4bb37ef953a3732e2 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/BeamSearch/ConstrainedBeamSearchScorer.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 401a1dea7cd23f44eb602218ded95da6 3 | MonoImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | defaultReferences: [] 7 | executionOrder: 0 8 | icon: {instanceID: 0} 9 | userData: 10 | assetBundleName: 11 | assetBundleVariant: 12 | -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/TimeManager.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!5 &1 4 | TimeManager: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 2 7 | Fixed Timestep: 8 | m_Count: 2822399 9 | m_Rate: 10 | m_Denominator: 1 11 | m_Numerator: 141120000 12 | Maximum Allowed Timestep: 0.33333334 13 | m_TimeScale: 1 14 | Maximum Particle Timestep: 0.03 15 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/BeamSearch/BeamSearchScorer.cs: -------------------------------------------------------------------------------- 1 | namespace Doji.AI.Transformers { 2 | public class BeamSearchScorer { 3 | public BeamSearchScorer( 4 | int batchSize, 5 | int numBeams, 6 | float? lengthPenalty = 1.0f, 7 | StoppingCondition doEarlyStopping = StoppingCondition.False, 8 | int? numBeamHypsToKeep = 1, 9 | int? numBeamGroups = 1, 10 | int? maxLength = null) { } 11 | } 12 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/LogitsProcess/LogitsProcessorList.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | public class LogitsProcessorList : List { 6 | public Tensor Apply(Tensor inputIds, Tensor scores) { 7 | foreach (var processor in this) { 8 | scores = processor.Apply(inputIds, scores); 9 | } 10 | return scores; 11 | } 12 | } 13 | } -------------------------------------------------------------------------------- /com.doji.transformers/Editor/Doji.Transformers.Editor.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Doji.Transformers.Editor", 3 | "rootNamespace": "", 4 | "references": [ 5 | "GUID:792a551df4a0292469be78bbb192f6d6" 6 | ], 7 | "includePlatforms": [ 8 | "Editor" 9 | ], 10 | "excludePlatforms": [], 11 | "allowUnsafeCode": false, 12 | "overrideReferences": true, 13 | "precompiledReferences": [], 14 | "autoReferenced": true, 15 | "defineConstraints": [], 16 | "versionDefines": [], 17 | "noEngineReferences": false 18 | } -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/BasicTokenizerTest.cs: -------------------------------------------------------------------------------- 1 | using NUnit.Framework; 2 | using System.Collections.Generic; 3 | 4 | namespace Doji.AI.Transformers.Editor.Tests { 5 | 6 | public class BasicTokenizerTest { 7 | 8 | [Test] 9 | public void TestEncodeSimple() { 10 | BasicTokenizer tokenizer = new BasicTokenizer(); 11 | var tokens = tokenizer.Tokenize("a cat"); 12 | List expected = new List() { "a", "cat" }; 13 | CollectionAssert.AreEqual(expected, tokens); 14 | } 15 | 16 | } 17 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/AudioManager.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!11 &1 4 | AudioManager: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 2 7 | m_Volume: 1 8 | Rolloff Scale: 1 9 | Doppler Factor: 1 10 | Default Speaker Mode: 2 11 | m_SampleRate: 0 12 | m_DSPBufferSize: 1024 13 | m_VirtualVoiceCount: 512 14 | m_RealVoiceCount: 32 15 | m_EnableOutputSuspension: 1 16 | m_SpatializerPlugin: 17 | m_AmbisonicDecoderPlugin: 18 | m_DisableAudio: 0 19 | m_VirtualizeEffects: 1 20 | m_RequestedDSPBufferSize: 0 21 | -------------------------------------------------------------------------------- /com.doji.transformers/Samples~/Doji.Transformers.Samples.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Doji.Transformers.Samples", 3 | "rootNamespace": "", 4 | "references": [ 5 | "GUID:792a551df4a0292469be78bbb192f6d6", 6 | "GUID:c98377141161c7746a178fb5cb1af075" 7 | ], 8 | "includePlatforms": [], 9 | "excludePlatforms": [], 10 | "allowUnsafeCode": false, 11 | "overrideReferences": false, 12 | "precompiledReferences": [], 13 | "autoReferenced": true, 14 | "defineConstraints": [], 15 | "versionDefines": [], 16 | "noEngineReferences": false 17 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/BeamSearch/BeamConstraints.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Doji.AI.Transformers { 4 | /// 5 | /// Abstract base class for all constraints that can be applied during generation. 6 | /// 7 | public abstract class Constraint { } 8 | public class DisjunctiveConstraint : Constraint { 9 | public DisjunctiveConstraint(List> wordIds) { } 10 | } 11 | 12 | public class PhrasalConstraint : Constraint { 13 | public PhrasalConstraint(List wordIds) { } 14 | } 15 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/BeamSearch/ConstrainedBeamSearchScorer.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | namespace Doji.AI.Transformers { 3 | public class ConstrainedBeamSearchScorer { 4 | public ConstrainedBeamSearchScorer( 5 | List constraints, 6 | int batchSize, 7 | int numBeams, 8 | float? lengthPenalty = 1.0f, 9 | StoppingCondition doEarlyStopping = StoppingCondition.False, 10 | int? numBeamHypsToKeep = 1, 11 | int? numBeamGroups = 1, 12 | int? maxLength = null) { } 13 | } 14 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/StoppingCriteria.cs: -------------------------------------------------------------------------------- 1 | using Unity.Sentis; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// This class can be used to stop generation whenever specific string sequences are generated. 7 | /// It preprocesses the strings together with the tokenizer vocab to find positions where tokens 8 | /// can validly complete the stop strings. 9 | /// 10 | public abstract class StoppingCriteria { 11 | internal Ops Ops { get; set; } 12 | public abstract Tensor Apply(Tensor inputIds, Tensor scores); 13 | } 14 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/VFXManager.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!937362698 &1 4 | VFXManager: 5 | m_ObjectHideFlags: 0 6 | m_IndirectShader: {fileID: 0} 7 | m_CopyBufferShader: {fileID: 0} 8 | m_PrefixSumShader: {fileID: 0} 9 | m_SortShader: {fileID: 0} 10 | m_StripUpdateShader: {fileID: 0} 11 | m_EmptyShader: {fileID: 0} 12 | m_RenderPipeSettingsPath: 13 | m_FixedTimeStep: 0.016666668 14 | m_MaxDeltaTime: 0.05 15 | m_MaxScrubTime: 30 16 | m_MaxCapacity: 100000000 17 | m_CompiledVersion: 0 18 | m_RuntimeVersion: 0 19 | m_RuntimeResources: {fileID: 0} 20 | m_BatchEmptyLifetime: 300 21 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/SinkCache.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | public class SinkCache : Cache { 6 | public override (Tensor Key, Tensor Value) this[int index] => throw new System.NotImplementedException(); 7 | 8 | public override IEnumerator<(Tensor Key, Tensor Value)> GetEnumerator() { 9 | throw new System.NotImplementedException(); 10 | } 11 | 12 | public override void Update(Tensor keyStates, Tensor valueStates, int layerIdx) { 13 | throw new System.NotImplementedException(); 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/TagManager.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!78 &1 4 | TagManager: 5 | serializedVersion: 3 6 | tags: [] 7 | layers: 8 | - Default 9 | - TransparentFX 10 | - Ignore Raycast 11 | - 12 | - Water 13 | - UI 14 | - 15 | - 16 | - 17 | - 18 | - 19 | - 20 | - 21 | - 22 | - 23 | - 24 | - 25 | - 26 | - 27 | - 28 | - 29 | - 30 | - 31 | - 32 | - 33 | - 34 | - 35 | - 36 | - 37 | - 38 | - 39 | - 40 | m_SortingLayers: 41 | - name: Default 42 | uniqueID: 0 43 | locked: 0 44 | m_RenderingLayers: 45 | - Default 46 | -------------------------------------------------------------------------------- /com.doji.transformers/Editor/TokenizerModelmporter.cs: -------------------------------------------------------------------------------- 1 | using UnityEditor.AssetImporters; 2 | using UnityEngine; 3 | using System.IO; 4 | 5 | namespace Doji.AI.Transformers.Editor { 6 | 7 | [ScriptedImporter(version: 1, ext: "model")] 8 | public class TokenizerModelImporter : ScriptedImporter { 9 | public override void OnImportAsset(AssetImportContext ctx) { 10 | byte[] bytes = File.ReadAllBytes(ctx.assetPath); 11 | var asset = ScriptableObject.CreateInstance(); 12 | asset.SetBytes(bytes); 13 | ctx.AddObjectToAsset("TokenizerModel", asset); 14 | ctx.SetMainObject(asset); 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/StopStringCriteria.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | public class StopStringCriteria : StoppingCriteria { 7 | public PreTrainedTokenizerBase Tokenizer { get; } 8 | public List StopStrings { get; } 9 | public StopStringCriteria(PreTrainedTokenizerBase tokenizer, List stopStrings) { 10 | Tokenizer = tokenizer; 11 | StopStrings = stopStrings; 12 | } 13 | public override Tensor Apply(Tensor inputIds, Tensor scores) { 14 | throw new System.NotImplementedException(); 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/Doji.Transformers.Editor.Tests.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Doji.Transformers.Editor.Tests", 3 | "rootNamespace": "", 4 | "references": [ 5 | "UnityEngine.TestRunner", 6 | "UnityEditor.TestRunner", 7 | "Doji.Transformers" 8 | ], 9 | "includePlatforms": [ 10 | "Editor" 11 | ], 12 | "excludePlatforms": [], 13 | "allowUnsafeCode": false, 14 | "overrideReferences": true, 15 | "precompiledReferences": [ 16 | "nunit.framework.dll", 17 | "Newtonsoft.Json.dll" 18 | ], 19 | "autoReferenced": false, 20 | "defineConstraints": [ 21 | "UNITY_INCLUDE_TESTS" 22 | ], 23 | "versionDefines": [], 24 | "noEngineReferences": false 25 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/Debug.cs: -------------------------------------------------------------------------------- 1 | #if UNITY_EDITOR || UNITY_STANDALONE || UNITY_ANDROID || UNITY_IOS || UNITY_WSA || UNITY_WEBGL || UNITY_LINUX 2 | #define UNITY 3 | #endif 4 | 5 | namespace Doji.AI.Transformers { 6 | 7 | internal static class Debug{ 8 | 9 | public static void Assert(bool condition) { 10 | #if UNITY 11 | UnityEngine.Debug.Assert(condition); 12 | #else 13 | System.Diagnostics.Debug.Assert(condition); 14 | #endif 15 | } 16 | public static void Assert(bool condition, string message) { 17 | #if UNITY 18 | UnityEngine.Debug.Assert(condition, message); 19 | #else 20 | System.Diagnostics.Debug.Assert(condition, message); 21 | #endif 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/MambaCache.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | public class MambaCache : Cache { 7 | public override (Tensor Key, Tensor Value) this[int index] => throw new System.NotImplementedException(); 8 | 9 | public override IEnumerator<(Tensor Key, Tensor Value)> GetEnumerator() { 10 | throw new System.NotImplementedException(); 11 | } 12 | 13 | public override void Reset() { 14 | throw new System.NotImplementedException(); 15 | } 16 | 17 | public override void Update(Tensor keyStates, Tensor valueStates, int layerIdx) { 18 | throw new System.NotImplementedException(); 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/HybridCache.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | public class HybridCache : Cache { 7 | public override (Tensor Key, Tensor Value) this[int index] => throw new System.NotImplementedException(); 8 | 9 | public override IEnumerator<(Tensor Key, Tensor Value)> GetEnumerator() { 10 | throw new System.NotImplementedException(); 11 | } 12 | 13 | public override void Reset() { 14 | throw new System.NotImplementedException(); 15 | } 16 | 17 | public override void Update(Tensor keyStates, Tensor valueStates, int layerIdx) { 18 | throw new System.NotImplementedException(); 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/StaticCache.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | public class StaticCache : Cache { 7 | public override (Tensor Key, Tensor Value) this[int index] => throw new System.NotImplementedException(); 8 | 9 | public override IEnumerator<(Tensor Key, Tensor Value)> GetEnumerator() { 10 | throw new System.NotImplementedException(); 11 | } 12 | 13 | public override void Reset() { 14 | throw new System.NotImplementedException(); 15 | } 16 | 17 | public override void Update(Tensor keyStates, Tensor valueStates, int layerIdx) { 18 | throw new System.NotImplementedException(); 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Unity/TokenizerModelAsset.cs: -------------------------------------------------------------------------------- 1 | using UnityEngine; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// A custom asset type that contains the raw data from tokenizer.model files. 7 | /// This is needed to allow to have tokenizer models in Resources folders 8 | /// (not actually deserializing the protocol buffer) and get them to be included in a build 9 | /// for passing to the tokenizer implementation via path. 10 | /// 11 | public class TokenizerModelAsset : ScriptableObject { 12 | [SerializeField, HideInInspector] 13 | private byte[] modelData; 14 | 15 | public byte[] ModelData => modelData; 16 | 17 | public void SetBytes(byte[] data) { 18 | modelData = data; 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Doji.Transformers.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Doji.Transformers", 3 | "rootNamespace": "", 4 | "references": [ 5 | "GUID:c98377141161c7746a178fb5cb1af075", 6 | "GUID:a6383e49882b5c445bf832fb8f773642", 7 | "GUID:cf0ac4207a68fac439ea28cb5bfbf97e" 8 | ], 9 | "includePlatforms": [], 10 | "excludePlatforms": [], 11 | "allowUnsafeCode": false, 12 | "overrideReferences": true, 13 | "precompiledReferences": [ 14 | "Microsoft.ML.Tokenizers.dll", 15 | "Newtonsoft.Json.dll" 16 | ], 17 | "autoReferenced": true, 18 | "defineConstraints": [], 19 | "versionDefines": [ 20 | { 21 | "name": "Unity", 22 | "expression": "", 23 | "define": "LOG_ERRORS" 24 | } 25 | ], 26 | "noEngineReferences": false 27 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils/PathUtils.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using UnityEngine; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | public static class PathUtils { 7 | 8 | public static string StreamingAssetsPath(this string subPath) { 9 | return Path.Combine(Application.streamingAssetsPath, subPath); 10 | } 11 | 12 | public static string ResourcePath(this string subPath) { 13 | return Path.ChangeExtension(subPath, null); 14 | } 15 | 16 | public static string StreamingAssetsPathForModel(this string subPath, string modelFileName) { 17 | return Path.Combine(Application.streamingAssetsPath, subPath, $"{modelFileName}.sentis"); 18 | } 19 | 20 | public static string ResourcePathForModel(this string subPath, string modelFileName) { 21 | return Path.Combine(subPath, $"{modelFileName}"); 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /com.doji.transformers/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "com.doji.transformers", 3 | "version": "0.1.5", 4 | "displayName": "Transformers", 5 | "description": "State-of-the-art transformer models to perform tasks on different modalities such as text, vision, and audio with Unity Sentis", 6 | "dependencies": { 7 | "com.unity.sentis": "2.1.2", 8 | "com.unity.nuget.newtonsoft-json": "3.2.1", 9 | "com.doji.sentencepiece": "0.1.1", 10 | "com.doji.sentis-utils": "0.1.0" 11 | }, 12 | "author": { 13 | "name": "Doji Technologies", 14 | "email": "support@doji-tech.com", 15 | "url": "https://www.doji-tech.com/" 16 | }, 17 | "documentationUrl": "https://docs.doji-tech.com/com.doji.transformers/", 18 | "samples": [ 19 | { 20 | "displayName": "Phi-3 Mini Sample", 21 | "description": "Basic example on generating text with a Phi-3 Mini-4K-Instruct model.", 22 | "path": "Samples~/01-Phi3ForCausalLMSample" 23 | } 24 | ] 25 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/StoppingCriteriaList.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | public class StoppingCriteriaList : List { 6 | 7 | private Ops _ops; 8 | 9 | public StoppingCriteriaList(Ops ops) { 10 | _ops = ops; 11 | } 12 | 13 | public Tensor Apply(Tensor inputIds, Tensor scores) { 14 | Tensor isDone = _ops.Zeros(new TensorShape(inputIds.shape[0])); 15 | foreach (var criteria in this) { 16 | isDone = _ops.Or(isDone, criteria.Apply(inputIds, scores)); 17 | } 18 | return isDone; 19 | } 20 | 21 | public new void Add(StoppingCriteria criteria) { 22 | if (criteria != null) { 23 | criteria.Ops = _ops; 24 | } 25 | base.Add(criteria); 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-2025 Julien Kipp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/EncoderDecoderCache.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | public class EncoderDecoderCache : Cache { 7 | 8 | public Cache SelfAttentionCache { get; private set; } 9 | public Cache CrossAttentionCache { get; private set; } 10 | 11 | public override (Tensor Key, Tensor Value) this[int index] => throw new System.NotImplementedException(); 12 | 13 | public EncoderDecoderCache(Cache selfAttentionCache, Cache crossAttentionCache) { 14 | SelfAttentionCache = selfAttentionCache; 15 | CrossAttentionCache = crossAttentionCache; 16 | } 17 | 18 | public override void Reset() { 19 | throw new System.NotImplementedException(); 20 | } 21 | 22 | public override void Update(Tensor keyStates, Tensor valueStates, int layerIdx) { 23 | throw new System.NotImplementedException(); 24 | } 25 | 26 | public override IEnumerator<(Tensor Key, Tensor Value)> GetEnumerator() { 27 | throw new System.NotImplementedException(); 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/InputEncoding.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// Represents the encoded output for a single text input. 7 | /// 8 | public class InputEncoding : Encoding { 9 | 10 | public InputEncoding() : base() { } 11 | public InputEncoding(Dictionary dict) : base(dict) { } 12 | 13 | public override IEnumerable InputIds { 14 | get { 15 | TryGetValue("input_ids", out var inputIds); 16 | return inputIds as IEnumerable; 17 | } 18 | } 19 | 20 | public override IEnumerable AttentionMask { 21 | get { 22 | TryGetValue("attention_mask", out var inputIds); 23 | return inputIds as IEnumerable; 24 | } 25 | } 26 | 27 | public override IEnumerable TokenTypeIds { 28 | get { 29 | TryGetValue("token_type_ids", out var inputIds); 30 | return inputIds as IEnumerable; 31 | } 32 | } 33 | } 34 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/PackageManagerSettings.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!114 &1 4 | MonoBehaviour: 5 | m_ObjectHideFlags: 53 6 | m_CorrespondingSourceObject: {fileID: 0} 7 | m_PrefabInstance: {fileID: 0} 8 | m_PrefabAsset: {fileID: 0} 9 | m_GameObject: {fileID: 0} 10 | m_Enabled: 1 11 | m_EditorHideFlags: 0 12 | m_Script: {fileID: 13964, guid: 0000000000000000e000000000000000, type: 0} 13 | m_Name: 14 | m_EditorClassIdentifier: 15 | m_EnablePreReleasePackages: 0 16 | m_AdvancedSettingsExpanded: 1 17 | m_ScopedRegistriesSettingsExpanded: 1 18 | m_SeeAllPackageVersions: 0 19 | m_DismissPreviewPackagesInUse: 0 20 | oneTimeWarningShown: 0 21 | oneTimeDeprecatedPopUpShown: 0 22 | m_Registries: 23 | - m_Id: main 24 | m_Name: 25 | m_Url: https://packages.unity.com 26 | m_Scopes: [] 27 | m_IsDefault: 1 28 | m_Capabilities: 7 29 | m_ConfigSource: 0 30 | m_UserSelectedRegistryName: 31 | m_UserAddingNewScopedRegistry: 0 32 | m_RegistryInfoDraft: 33 | m_Modified: 0 34 | m_ErrorMessage: 35 | m_UserModificationsInstanceId: -908 36 | m_OriginalInstanceId: -910 37 | m_LoadAssets: 0 38 | -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/UnityConnectSettings.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!310 &1 4 | UnityConnectSettings: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 1 7 | m_Enabled: 0 8 | m_TestMode: 0 9 | m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events 10 | m_EventUrl: https://cdp.cloud.unity3d.com/v1/events 11 | m_ConfigUrl: https://config.uca.cloud.unity3d.com 12 | m_DashboardUrl: https://dashboard.unity3d.com 13 | m_TestInitMode: 0 14 | CrashReportingSettings: 15 | serializedVersion: 2 16 | m_EventUrl: https://perf-events.cloud.unity3d.com 17 | m_EnableCloudDiagnosticsReporting: 0 18 | m_LogBufferSize: 10 19 | m_CaptureEditorExceptions: 1 20 | UnityPurchasingSettings: 21 | m_Enabled: 0 22 | m_TestMode: 0 23 | UnityAnalyticsSettings: 24 | m_Enabled: 0 25 | m_TestMode: 0 26 | m_InitializeOnStartup: 1 27 | m_PackageRequiringCoreStatsPresent: 0 28 | UnityAdsSettings: 29 | m_Enabled: 0 30 | m_InitializeOnStartup: 1 31 | m_TestMode: 0 32 | m_IosGameId: 33 | m_AndroidGameId: 34 | m_GameIds: {} 35 | m_GameId: 36 | PerformanceReportingSettings: 37 | m_Enabled: 0 38 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/EosTokenCriteria.cs: -------------------------------------------------------------------------------- 1 | using Unity.Sentis; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// This class can be used to stop generation whenever the "end-of-sequence" token is generated. 7 | /// By default, it uses the `model.generation_config.eos_token_id`. 8 | /// 9 | public class EosTokenCriteria : StoppingCriteria { 10 | 11 | /// 12 | /// The id(s) of the *end-of-sequence* token. 13 | /// 14 | public int[] EosTokenId { get; } 15 | 16 | public EosTokenCriteria(int[] eosTokenId) { 17 | EosTokenId = eosTokenId; 18 | } 19 | 20 | public override Tensor Apply(Tensor inputIds, Tensor scores) { 21 | Tensor isDone = Ops.Zeros(new TensorShape(inputIds.shape[0])); 22 | Tensor lastTokenInputs = Ops.Slice(inputIds, .., ^1); 23 | foreach (int eosToken in EosTokenId) { 24 | Tensor eos = Ops.NewTensor(eosToken); 25 | isDone = Ops.Or(isDone, Ops.Equal(lastTokenInputs, eos)); 26 | } 27 | return isDone; 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/Log.cs: -------------------------------------------------------------------------------- 1 | #if UNITY_EDITOR || UNITY_STANDALONE || UNITY_ANDROID || UNITY_IOS || UNITY_WSA || UNITY_WEBGL || UNITY_LINUX 2 | #define UNITY 3 | #endif 4 | 5 | using System.Diagnostics; 6 | 7 | namespace Doji.AI.Transformers { 8 | 9 | public static class Log{ 10 | 11 | [Conditional("LOG_INFO")] 12 | public static void Info(string message) { 13 | #if UNITY 14 | UnityEngine.Debug.Log(message); 15 | #else 16 | System.Console.WriteLine(message); 17 | #endif 18 | } 19 | 20 | [Conditional("LOG_INFO")] 21 | public static void Info(object message) { 22 | #if UNITY 23 | UnityEngine.Debug.Log(message); 24 | #else 25 | System.Console.WriteLine(message); 26 | #endif 27 | } 28 | 29 | public static void Warning(string message) { 30 | #if UNITY 31 | UnityEngine.Debug.LogWarning(message); 32 | #else 33 | System.Console.WriteLine(message); 34 | #endif 35 | } 36 | 37 | public static void Error(string message) { 38 | #if UNITY 39 | UnityEngine.Debug.LogError(message); 40 | #else 41 | System.Console.WriteLine(message); 42 | #endif 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/ModelOutput.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | /// 7 | /// Base class for all model outputs as dataclass. Has a method that allows 8 | /// indexing by strings (like a dictionary) that will ignore the null attributes. 9 | /// Otherwise behaves like a regular dictionary. 10 | /// 11 | public abstract class ModelOutput : Dictionary { 12 | public ModelOutput() : base() { } 13 | public T Get(string key, T defaultValue = default) { 14 | if (TryGetValue(key, out object value)) { 15 | return (T)value; 16 | } 17 | return defaultValue; 18 | } 19 | public object Get(string key, object defaultValue = null) { 20 | return this.GetValueOrDefault(key, defaultValue); 21 | } 22 | } 23 | public class CausalLMOutputWithPast : ModelOutput { 24 | public Tensor Logits { get; } 25 | public CausalLMOutputWithPast(Tensor logits) : base() { 26 | Logits = logits; 27 | Add("logits", logits); 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /com.doji.transformers/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 6 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 7 | 8 | ## [0.1.5] - 2025-04-10 9 | 10 | ### Fixed 11 | 12 | - Improved protocol buffer based tokenizer.model handling, which fixes issues preventing package to be used at runtime 13 | 14 | ## [0.1.4] - 2025-04-04 15 | 16 | ## [0.1.3] - 2025-04-04 17 | 18 | ### Changed 19 | 20 | - Changed the dependency version of com.doji.sentencepiece from 0.0.0 to 0.1.0 21 | 22 | ## [0.1.2] - 2025-04-01 23 | 24 | ## [0.1.1] - 2024-08-16 25 | 26 | ### Fixed 27 | 28 | - Fixed mising dependency in package manifest 29 | 30 | ## [0.1.0] - 2024-08-16 31 | 32 | ### Added 33 | 34 | - LlamaTokenizer 35 | - GPT2Tokenizer 36 | - A (preliminary) implementation for running Phi-3 Mini models 37 | 38 | ## [0.0.2] - 2024-04-10 39 | 40 | ### Fixed 41 | 42 | - Fixed issue where ClipTokenizer initialization fails with ArgumentOutOfRangeException 43 | 44 | ## [0.0.1] - 2024-03-24 45 | 46 | ### Added 47 | 48 | - Custom JsonConverter for tokenizer inputs 49 | - Custom JsonConverter for vocab files 50 | 51 | ## [0.0.0] - 2024-01-29 52 | 53 | - Initial Release 54 | - Supports CLIP tokenization -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/MaxTimeCriteria.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | /// 7 | /// This class can be used to stop generation whenever the full generation exceeds some amount of time. 8 | /// By default, the time will start being counted when you initialize this function. 9 | /// You can override this by passing an . 10 | /// 11 | public class MaxTimeCriteria : StoppingCriteria { 12 | 13 | /// 14 | /// The maximum allowed time in seconds for the generation. 15 | /// 16 | public float MaxTime { get; } 17 | 18 | /// 19 | /// The start of the generation allowed time. 20 | /// 21 | public DateTime InitialTimestamp { get; } 22 | 23 | public MaxTimeCriteria(float maxTime, DateTime? initialTimestamp = null) { 24 | MaxTime = maxTime; 25 | InitialTimestamp = initialTimestamp ?? DateTime.UtcNow; 26 | } 27 | 28 | public override Tensor Apply(Tensor inputIds, Tensor scores) { 29 | bool isDone = (DateTime.UtcNow - InitialTimestamp).TotalSeconds > MaxTime; 30 | return Ops.Full(inputIds.shape[0], isDone); 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/MemorySettings.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!387306366 &1 4 | MemorySettings: 5 | m_ObjectHideFlags: 0 6 | m_EditorMemorySettings: 7 | m_MainAllocatorBlockSize: -1 8 | m_ThreadAllocatorBlockSize: -1 9 | m_MainGfxBlockSize: -1 10 | m_ThreadGfxBlockSize: -1 11 | m_CacheBlockSize: -1 12 | m_TypetreeBlockSize: -1 13 | m_ProfilerBlockSize: -1 14 | m_ProfilerEditorBlockSize: -1 15 | m_BucketAllocatorGranularity: -1 16 | m_BucketAllocatorBucketsCount: -1 17 | m_BucketAllocatorBlockSize: -1 18 | m_BucketAllocatorBlockCount: -1 19 | m_ProfilerBucketAllocatorGranularity: -1 20 | m_ProfilerBucketAllocatorBucketsCount: -1 21 | m_ProfilerBucketAllocatorBlockSize: -1 22 | m_ProfilerBucketAllocatorBlockCount: -1 23 | m_TempAllocatorSizeMain: -1 24 | m_JobTempAllocatorBlockSize: -1 25 | m_BackgroundJobTempAllocatorBlockSize: -1 26 | m_JobTempAllocatorReducedBlockSize: -1 27 | m_TempAllocatorSizeGIBakingWorker: -1 28 | m_TempAllocatorSizeNavMeshWorker: -1 29 | m_TempAllocatorSizeAudioWorker: -1 30 | m_TempAllocatorSizeCloudWorker: -1 31 | m_TempAllocatorSizeGfx: -1 32 | m_TempAllocatorSizeJobWorker: -1 33 | m_TempAllocatorSizeBackgroundWorker: -1 34 | m_TempAllocatorSizePreloadManager: -1 35 | m_PlatformMemorySettings: {} 36 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/Cache.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using Unity.Sentis; 5 | 6 | namespace Doji.AI.Transformers { 7 | 8 | public abstract class Cache : IEnumerable<(Tensor Key, Tensor Value)> { 9 | 10 | internal Ops Ops { get; set; } 11 | public int? MaxBatchSize { get; private set; } 12 | public int? MaxCacheLen { get; private set; } 13 | 14 | public virtual void Reset() { } 15 | 16 | /// 17 | /// Returns the sequence length of the cached states." 18 | /// 19 | public virtual int GetSeqLength(int? layerIdx = 0) { 20 | throw new NotImplementedException($"Make sure to implement {nameof(GetSeqLength)} in subclass '{GetType()}'."); 21 | } 22 | 23 | /// 24 | /// Updates the cache with the new and for the layer . 25 | /// 26 | public abstract void Update(Tensor keyStates, Tensor valueStates, int layerIdx); 27 | 28 | public abstract (Tensor Key, Tensor Value) this[int index] { get; } 29 | 30 | public abstract IEnumerator<(Tensor Key, Tensor Value)> GetEnumerator(); 31 | 32 | IEnumerator IEnumerable.GetEnumerator() { 33 | return GetEnumerator(); 34 | } 35 | } 36 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/MaxNewTokensCriteria.cs: -------------------------------------------------------------------------------- 1 | using Unity.Sentis; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// This class can be used to stop generation whenever the generated number of tokens exceeds . 7 | /// Keep in mind for decoder-only type of transformers, this will ** not** include the initial prompted tokens. 8 | /// This is very close to but ignores the number of initial tokens. 9 | /// 10 | public class MaxNewTokensCriteria : StoppingCriteria { 11 | 12 | /// 13 | /// The number of initial tokens. 14 | /// 15 | public int StartLength { get; } 16 | 17 | /// 18 | /// The maximum number of tokens to generate. 19 | /// 20 | public int MaxNewTokens { get; } 21 | 22 | public int MaxLength { get; } 23 | 24 | public MaxNewTokensCriteria(int startLength, int maxNewTokens) { 25 | StartLength = startLength; 26 | MaxNewTokens = maxNewTokens; 27 | MaxLength = startLength + maxNewTokens; 28 | } 29 | 30 | public override Tensor Apply(Tensor inputIds, Tensor scores) { 31 | bool isDone = inputIds.shape[-1] >= MaxLength; 32 | return Ops.Full(inputIds.shape[0], isDone); 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Clip/Vocab.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | 6 | namespace Doji.AI.Transformers { 7 | 8 | [JsonConverter(typeof(VocabConverter))] 9 | public class Vocab { 10 | 11 | public Dictionary Encoder { get; set; } 12 | public Dictionary Decoder { get; set; } 13 | 14 | public Vocab(Dictionary encoder) { 15 | Encoder = encoder; 16 | Decoder = encoder.ToDictionary(x => x.Value, x => x.Key); 17 | } 18 | 19 | public static Vocab Deserialize(string json) { 20 | Dictionary vocabEntries = JsonConvert.DeserializeObject>(json); 21 | return new Vocab(vocabEntries); 22 | } 23 | } 24 | 25 | public class VocabConverter : JsonConverter { 26 | public override Vocab ReadJson(JsonReader reader, Type objectType, Vocab existingValue, bool hasExistingValue, JsonSerializer serializer) { 27 | var vocabEntries = serializer.Deserialize>(reader); 28 | return new Vocab(vocabEntries); 29 | } 30 | public override void WriteJson(JsonWriter writer, Vocab value, JsonSerializer serializer) { 31 | var vocabEntries = value.Encoder; 32 | serializer.Serialize(writer, vocabEntries); 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/Kwargs.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | 6 | namespace Doji.AI.Transformers { 7 | 8 | /// 9 | /// Provides similar functionality & method names like python kwargs to simplify porting code from Python -> C# 10 | /// 11 | public class Kwargs : Dictionary, ICollection, IDictionary { 12 | 13 | public Kwargs(IEnumerable> collection) : base(collection) { } 14 | public Kwargs() : base() { } 15 | 16 | public object Get(string key, object defaultValue = null) { 17 | return this.GetValueOrDefault(key, defaultValue); 18 | } 19 | 20 | public T Get(string key, T defaultValue = default) { 21 | if (TryGetValue(key, out object value)) { 22 | return (T)value; 23 | } 24 | return defaultValue; 25 | } 26 | 27 | public object Pop(string key, object defaultVal = default) { 28 | if (TryGetValue(key, out object val)) { 29 | Remove(key); 30 | return val; 31 | } else { 32 | return defaultVal; 33 | } 34 | } 35 | 36 | public Kwargs Where(Func, bool> predicate) { 37 | return new Kwargs(this.AsEnumerable().Where(predicate)); 38 | } 39 | } 40 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/DynamicsManager.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!55 &1 4 | PhysicsManager: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 18 7 | m_Gravity: {x: 0, y: -9.81, z: 0} 8 | m_DefaultMaterial: {fileID: 0} 9 | m_BounceThreshold: 2 10 | m_DefaultMaxDepenetrationVelocity: 10 11 | m_SleepThreshold: 0.005 12 | m_DefaultContactOffset: 0.01 13 | m_DefaultSolverIterations: 6 14 | m_DefaultSolverVelocityIterations: 1 15 | m_QueriesHitBackfaces: 0 16 | m_QueriesHitTriggers: 1 17 | m_EnableAdaptiveForce: 0 18 | m_ClothInterCollisionDistance: 0.1 19 | m_ClothInterCollisionStiffness: 0.2 20 | m_LayerCollisionMatrix: ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff 21 | m_SimulationMode: 0 22 | m_AutoSyncTransforms: 0 23 | m_ReuseCollisionCallbacks: 0 24 | m_InvokeCollisionCallbacks: 1 25 | m_ClothInterCollisionSettingsToggle: 0 26 | m_ClothGravity: {x: 0, y: -9.81, z: 0} 27 | m_ContactPairsMode: 0 28 | m_BroadphaseType: 0 29 | m_FrictionType: 0 30 | m_EnableEnhancedDeterminism: 0 31 | m_ImprovedPatchFriction: 0 32 | m_SolverType: 0 33 | m_DefaultMaxAngularSpeed: 50 34 | m_ScratchBufferChunkCount: 4 35 | m_CurrentBackendId: 4072204805 36 | m_FastMotionThreshold: 3.4028235e+38 37 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/AddedToken.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// AddedToken represents a token to be added to a Tokenizer 7 | /// An AddedToken can have special options defining the 8 | /// way it should behave. 9 | /// The `normalized` will default to `not special` if it is not specified, 10 | /// similarly to the definition in `tokenizers`. 11 | /// 12 | public class AddedToken : Token { 13 | 14 | [JsonProperty("single_word")] 15 | public bool SingleWord { get; set; } 16 | 17 | [JsonProperty("lstrip")] 18 | public bool Lstrip { get; set; } 19 | 20 | [JsonProperty("rstrip")] 21 | public bool Rstrip { get; set; } 22 | 23 | public bool Special { get; set; } 24 | 25 | [JsonProperty("normalized")] 26 | public bool Normalized { get; set; } 27 | 28 | [JsonProperty("__type")] 29 | public string Type { get; set; } 30 | 31 | public AddedToken( 32 | string content, 33 | bool singleWord = false, 34 | bool lstrip = false, 35 | bool rstrip = false, 36 | bool special = false, 37 | bool? normalized = null) 38 | { 39 | Content = content; 40 | SingleWord = singleWord; 41 | Lstrip = lstrip; 42 | Rstrip = rstrip; 43 | Special = special; 44 | Normalized = normalized ?? !special; 45 | } 46 | } 47 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/QuantizedCacheConfig.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System.Collections.Generic; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | /// 7 | /// Base class for cache configs 8 | /// 9 | public abstract class CacheConfig : Dictionary { } 10 | /// 11 | /// Configuration class for quantized cache settings. 12 | /// 13 | public class QuantizedCacheConfig : CacheConfig { 14 | 15 | [JsonProperty("backend")] 16 | public string Backend { get; set; } 17 | 18 | [JsonProperty("nBits")] 19 | public int? NBits { get; set; } 20 | 21 | [JsonProperty("axisKey")] 22 | public int? AxisKey { get; set; } 23 | 24 | [JsonProperty("axisValue")] 25 | public int? AxisValue { get; set; } 26 | 27 | [JsonProperty("qGroupSize")] 28 | public int? QGroupSize { get; set; } 29 | 30 | [JsonProperty("residualLength")] 31 | public int? ResidualLength { get; set; } 32 | 33 | public QuantizedCacheConfig( 34 | string backend = "quanto", 35 | int? nBits = 4, 36 | int? axisKey = 0, 37 | int? axisValue = 0, 38 | int? qGroupSize = 64, 39 | int? residualLength = 128) 40 | { 41 | Backend = backend; 42 | NBits = nBits; 43 | AxisKey = axisKey; 44 | AxisValue = axisValue; 45 | QGroupSize = qGroupSize; 46 | ResidualLength = residualLength; 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils/PretrainedConfig.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System.Collections.Generic; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | /// 7 | /// Base class for all configuration classes. Handles a few parameters common to all models' configurations 8 | /// as well as methods for loading configurations. 9 | /// 10 | public class PretrainedConfig { 11 | 12 | /* Common attributes(present in all subclasses): */ 13 | 14 | [JsonProperty("vocab_size")] 15 | public int VocabSize { get; set; } 16 | 17 | [JsonProperty("hidden_size")] 18 | public int HiddenSize { get; set; } 19 | 20 | [JsonProperty("num_attention_heads")] 21 | public int NumAttentionHeads { get; set; } 22 | 23 | [JsonProperty("num_hidden_layers")] 24 | public int NumHiddenLayers { get; set; } 25 | 26 | 27 | [JsonProperty("_name_or_path")] 28 | public string NameOrPath { get; set; } 29 | 30 | [JsonProperty("architectures")] 31 | public List Architectures { get; set; } 32 | 33 | [JsonProperty("is_encoder_decoder")] 34 | public bool IsEncoderDecoder { get; set; } 35 | 36 | 37 | /* These are actually not part of the base config class, but still convenient to have here */ 38 | 39 | [JsonProperty("sliding_window")] 40 | public int? SlidingWindow { get; set; } 41 | 42 | [JsonProperty("max_position_embeddings")] 43 | public int? MaxPositionEmbeddings { get; set; } 44 | 45 | public PretrainedConfig() { 46 | IsEncoderDecoder = false; 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /projects/Transformers/Packages/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "com.doji.sentencepiece": "file:../../../../com.doji.sentencepiece/com.doji.sentencepiece", 4 | "com.doji.sentis-utils": "file:../../../../com.doji.sentis-utils/com.doji.sentis-utils", 5 | "com.doji.transformers": "file:../../../com.doji.transformers", 6 | "com.unity.ide.visualstudio": "2.0.23", 7 | "com.unity.sentis": "file:../../../../com.unity.sentis/com.unity.sentis", 8 | "com.unity.ugui": "2.0.0", 9 | "com.unity.modules.ai": "1.0.0", 10 | "com.unity.modules.androidjni": "1.0.0", 11 | "com.unity.modules.animation": "1.0.0", 12 | "com.unity.modules.assetbundle": "1.0.0", 13 | "com.unity.modules.audio": "1.0.0", 14 | "com.unity.modules.imageconversion": "1.0.0", 15 | "com.unity.modules.imgui": "1.0.0", 16 | "com.unity.modules.jsonserialize": "1.0.0", 17 | "com.unity.modules.particlesystem": "1.0.0", 18 | "com.unity.modules.physics": "1.0.0", 19 | "com.unity.modules.physics2d": "1.0.0", 20 | "com.unity.modules.screencapture": "1.0.0", 21 | "com.unity.modules.tilemap": "1.0.0", 22 | "com.unity.modules.ui": "1.0.0", 23 | "com.unity.modules.uielements": "1.0.0", 24 | "com.unity.modules.unitywebrequest": "1.0.0", 25 | "com.unity.modules.unitywebrequestassetbundle": "1.0.0", 26 | "com.unity.modules.unitywebrequestaudio": "1.0.0", 27 | "com.unity.modules.unitywebrequesttexture": "1.0.0", 28 | "com.unity.modules.unitywebrequestwww": "1.0.0", 29 | "com.unity.modules.video": "1.0.0", 30 | "com.unity.modules.vr": "1.0.0", 31 | "com.unity.modules.xr": "1.0.0" 32 | }, 33 | "testables": [ 34 | "com.doji.transformers" 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /com.doji.transformers/Samples~/01-Phi3ForCausalLMSample/Phi3ForCausalLMSample.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | using Unity.Sentis; 3 | using UnityEngine; 4 | 5 | namespace Doji.AI.Transformers.Samples { 6 | 7 | // before running this sample, go to https://huggingface.co/julienkay/Phi-3-mini-4k-instruct_no_cache_uint8 8 | // download all the files and place them in StreamingAssets/julienkay/Phi-3-mini-4k-instruct_no_cache_uint8 9 | 10 | public class Phi3ForCausalLMSample : MonoBehaviour { 11 | 12 | private PreTrainedTokenizerBase _tokenizer; 13 | private Phi3ForCausalLM _model; 14 | 15 | private void Start() { 16 | string modelId = "julienkay/Phi-3-mini-4k-instruct_no_cache_uint8"; 17 | var prompt = "<|user|>\nCan you provide ways to eat combinations of bananas and dragonfruits?<|end|>\n<|assistant|>\n"; 18 | _tokenizer = AutoTokenizer.FromPretrained(modelId); 19 | _model = Phi3ForCausalLM.FromPretrained(modelId); 20 | 21 | var encodings = _tokenizer.Encode(prompt); 22 | var inputIds = encodings.InputIds.ToArray(); 23 | 24 | using Tensor inputTensor = new Tensor(new TensorShape(1, inputIds.Length), inputIds); 25 | _model.GenerationConfig.MaxNewTokens = 20; 26 | var result = _model.Generate(inputTensor); 27 | var seq = result.Get>("sequences"); 28 | seq = seq.ReadbackAndClone(); 29 | string output = _tokenizer.Decode(seq.DownloadToArray().ToList(), skipSpecialTokens: true, cleanUpTokenizationSpaces: false); 30 | Debug.Log(output); 31 | } 32 | 33 | private void OnDestroy() { 34 | _model.Dispose(); 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/EditorSettings.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!159 &1 4 | EditorSettings: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 13 7 | m_SerializationMode: 2 8 | m_LineEndingsForNewScripts: 2 9 | m_DefaultBehaviorMode: 0 10 | m_PrefabRegularEnvironment: {fileID: 0} 11 | m_PrefabUIEnvironment: {fileID: 0} 12 | m_SpritePackerMode: 0 13 | m_SpritePackerCacheSize: 10 14 | m_SpritePackerPaddingPower: 1 15 | m_Bc7TextureCompressor: 0 16 | m_EtcTextureCompressorBehavior: 1 17 | m_EtcTextureFastCompressor: 1 18 | m_EtcTextureNormalCompressor: 2 19 | m_EtcTextureBestCompressor: 4 20 | m_ProjectGenerationIncludedExtensions: txt;xml;fnt;cd;asmdef;asmref;rsp;java;cpp;c;mm;m;h 21 | m_ProjectGenerationRootNamespace: 22 | m_EnableTextureStreamingInEditMode: 1 23 | m_EnableTextureStreamingInPlayMode: 1 24 | m_EnableEditorAsyncCPUTextureLoading: 0 25 | m_AsyncShaderCompilation: 1 26 | m_PrefabModeAllowAutoSave: 1 27 | m_EnterPlayModeOptionsEnabled: 1 28 | m_EnterPlayModeOptions: 0 29 | m_GameObjectNamingDigits: 1 30 | m_GameObjectNamingScheme: 0 31 | m_AssetNamingUsesSpace: 1 32 | m_InspectorUseIMGUIDefaultInspector: 0 33 | m_UseLegacyProbeSampleCount: 0 34 | m_SerializeInlineMappingsOnOneLine: 1 35 | m_DisableCookiesInLightmapper: 0 36 | m_AssetPipelineMode: 1 37 | m_RefreshImportMode: 0 38 | m_CacheServerMode: 0 39 | m_CacheServerEndpoint: 40 | m_CacheServerNamespacePrefix: default 41 | m_CacheServerEnableDownload: 1 42 | m_CacheServerEnableUpload: 1 43 | m_CacheServerEnableAuth: 0 44 | m_CacheServerEnableTls: 0 45 | m_CacheServerValidationMode: 2 46 | m_CacheServerDownloadBatchSize: 128 47 | m_EnableEnlightenBakedGI: 0 48 | m_ReferencedClipsExactNaming: 1 49 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/Encoding.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// A dictionary that holds the output of 7 | /// and methods 8 | /// (tokens, attention_masks, etc). 9 | /// 10 | public abstract class Encoding : Dictionary { 11 | 12 | /// 13 | /// Indices of input sequence tokens in the vocabulary. 14 | /// These are numerical representations of tokens that will be used as the main input by most models. 15 | /// 16 | public abstract IEnumerable InputIds { get; } 17 | 18 | /// 19 | /// Mask to avoid performing attention on padding token indices. Mask values selected in [0, 1]: 20 | /// Only valid if 'returnAttentionMask = true' was passed to Encode() method. 21 | /// 22 | public abstract IEnumerable AttentionMask { get; } 23 | 24 | /// 25 | /// Segment token indices to indicate first and second portions of the inputs. Indices are selected in [0, 1]: 26 | /// Only valid if 'returnTokenTypeIds = true' was passed to Encode() method. 27 | /// 28 | public abstract IEnumerable TokenTypeIds { get; } 29 | 30 | internal int NumTruncatedTokens { get; set; } 31 | internal int Length { get; set; } 32 | 33 | public bool PrependBatchAxis { get; set;} 34 | public int? NSequences { get; set; } 35 | 36 | public Encoding() : base() { } 37 | protected Encoding(Dictionary dict) : base(dict) { } 38 | } 39 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/Utils/GenerateDecoderOnlyOutput.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Unity.Sentis; 3 | 4 | namespace Doji.AI.Transformers { 5 | 6 | public class GenerateDecoderOnlyOutput : ModelOutput { 7 | public Tensor Sequences { 8 | get { 9 | return Get>("sequences"); 10 | } 11 | internal set { 12 | this["sequences"] = value; 13 | } 14 | } 15 | public List> Scores { 16 | get { 17 | return Get>>("scores"); 18 | } 19 | internal set { 20 | this["scores"] = value; 21 | } 22 | } 23 | public List> Logits { 24 | get { 25 | return Get>>("logits"); 26 | } 27 | internal set { 28 | this["logits"] = value; 29 | } 30 | } 31 | public List Attentions { 32 | get { 33 | return Get>("attentions"); 34 | } 35 | internal set { 36 | this["attentions"] = value; 37 | } 38 | } 39 | public List HiddenStates { 40 | get { 41 | return Get>("hidden_states"); 42 | } 43 | internal set { 44 | this["hidden_states"] = value; 45 | } 46 | } 47 | public object PastKeyValues { 48 | get { 49 | return Get("past_key_values"); 50 | } 51 | internal set { 52 | this["past_key_values"] = value; 53 | } 54 | } 55 | } 56 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/NavMeshAreas.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!126 &1 4 | NavMeshProjectSettings: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 2 7 | areas: 8 | - name: Walkable 9 | cost: 1 10 | - name: Not Walkable 11 | cost: 1 12 | - name: Jump 13 | cost: 2 14 | - name: 15 | cost: 1 16 | - name: 17 | cost: 1 18 | - name: 19 | cost: 1 20 | - name: 21 | cost: 1 22 | - name: 23 | cost: 1 24 | - name: 25 | cost: 1 26 | - name: 27 | cost: 1 28 | - name: 29 | cost: 1 30 | - name: 31 | cost: 1 32 | - name: 33 | cost: 1 34 | - name: 35 | cost: 1 36 | - name: 37 | cost: 1 38 | - name: 39 | cost: 1 40 | - name: 41 | cost: 1 42 | - name: 43 | cost: 1 44 | - name: 45 | cost: 1 46 | - name: 47 | cost: 1 48 | - name: 49 | cost: 1 50 | - name: 51 | cost: 1 52 | - name: 53 | cost: 1 54 | - name: 55 | cost: 1 56 | - name: 57 | cost: 1 58 | - name: 59 | cost: 1 60 | - name: 61 | cost: 1 62 | - name: 63 | cost: 1 64 | - name: 65 | cost: 1 66 | - name: 67 | cost: 1 68 | - name: 69 | cost: 1 70 | - name: 71 | cost: 1 72 | m_LastAgentTypeID: -887442657 73 | m_Settings: 74 | - serializedVersion: 3 75 | agentTypeID: 0 76 | agentRadius: 0.5 77 | agentHeight: 2 78 | agentSlope: 45 79 | agentClimb: 0.75 80 | ledgeDropHeight: 0 81 | maxJumpAcrossDistance: 0 82 | minRegionArea: 2 83 | manualCellSize: 0 84 | cellSize: 0.16666667 85 | manualTileSize: 0 86 | tileSize: 256 87 | buildHeightMesh: 0 88 | maxJobWorkers: 0 89 | preserveTilesOutsideBounds: 0 90 | debug: 91 | m_Flags: 0 92 | m_SettingNames: 93 | - Humanoid 94 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Generation/StoppingCriteria/MaxLengthCriteria.cs: -------------------------------------------------------------------------------- 1 | using Unity.Sentis; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// This class can be used to stop generation whenever the full generated number of tokens 7 | /// exceeds . Keep in mind for decoder-only type of transformers, 8 | /// this will include the initial prompted tokens. 9 | /// 10 | public class MaxLengthCriteria : StoppingCriteria { 11 | 12 | /// 13 | /// The maximum length that the output sequence can have in number of tokens. 14 | /// 15 | public int MaxLength { get; } 16 | 17 | /// 18 | /// The maximum model length, as defined by the model's `config.max_position_embeddings` attribute. 19 | /// 20 | public int? MaxPositionEmbeddings { get; } 21 | 22 | public MaxLengthCriteria(int maxLength, int? maxPositionEmbeddings) { 23 | MaxLength = maxLength; 24 | MaxPositionEmbeddings = maxPositionEmbeddings; 25 | } 26 | 27 | public override Tensor Apply(Tensor inputIds, Tensor scores) { 28 | int curLen = inputIds.shape[-1]; 29 | bool isDone = curLen >= MaxLength; 30 | if (MaxPositionEmbeddings != null && !isDone && curLen >= MaxPositionEmbeddings) { 31 | Log.Warning("This is a friendly reminder - the current text generation call will exceed the model's predefined " + 32 | $"maximum length ({MaxPositionEmbeddings}). Depending on the model, you may observe " + 33 | "exceptions, performance degradation, or nothing at all." 34 | ); 35 | } 36 | return Ops.Full(inputIds.shape[0], isDone); 37 | } 38 | } 39 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/ISpecialTokensMixin.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Doji.AI.Transformers { 4 | public interface ISpecialTokensMixin { 5 | 6 | public int AddSpecialTokens(Dictionary specialTokensDict, bool replaceAdditionalSpecialTokens = true); 7 | 8 | public int AddTokens(string newTokens); 9 | public int AddTokens(AddedToken newTokens); 10 | public int AddTokens(List newTokens); 11 | 12 | public Token BosToken { get; set; } 13 | public Token EosToken { get; set; } 14 | public Token UnkToken { get; set; } 15 | public Token SepToken { get; set; } 16 | public Token PadToken { get; set; } 17 | public Token ClsToken { get; set; } 18 | public Token MaskToken { get; set; } 19 | 20 | public List AdditionalSpecialTokens { get; set; } 21 | 22 | public int? BosTokenId { get;} 23 | public int? EosTokenId { get; } 24 | public int? UnkTokenId { get; } 25 | public int? SepTokenId { get; } 26 | public int? PadTokenId { get; } 27 | public int PadTokenTypeID { get; } 28 | public int? ClsTokenId { get; } 29 | public int? MaskTokenId { get; } 30 | 31 | public List AdditionalSpecialTokensIds { get; set; } 32 | 33 | public HashSet SpecialTokensMap { get; } 34 | 35 | public HashSet SpecialTokensMapExtended { get; } 36 | public List AllSpecialTokensExtended { get; } 37 | 38 | public List AllSpecialTokens { get; } 39 | public List AllSpecialIds { get; } 40 | 41 | /*public static List SPECIAL_TOKENS_ATTRIBUTES = new List() { 42 | "bos_token", 43 | "eos_token", 44 | "unk_token", 45 | "sep_token", 46 | "pad_token", 47 | "cls_token", 48 | "mask_token", 49 | "additional_special_tokens" 50 | };*/ 51 | } 52 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtils/Token.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json.Linq; 2 | using Newtonsoft.Json; 3 | using System; 4 | 5 | namespace Doji.AI.Transformers { 6 | 7 | public class Token { 8 | 9 | [JsonProperty("content")] 10 | public string Content { get; set; } 11 | 12 | public static implicit operator string(Token t) => t.Content; 13 | public static implicit operator Token(string s) => new TokenString(s); 14 | 15 | public override string ToString() { 16 | return Content; 17 | } 18 | } 19 | 20 | public class TokenConverter : JsonConverter { 21 | public override bool CanConvert(Type objectType) { 22 | return objectType == typeof(Token) || objectType == typeof(string); 23 | } 24 | 25 | public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { 26 | if (reader.TokenType == JsonToken.String) { 27 | // If the JSON represents the token as a simple string, create an TokenString instance 28 | string content = reader.Value.ToString(); 29 | return new TokenString(content); 30 | } 31 | 32 | JObject obj = JObject.Load(reader); 33 | 34 | // Check if '__type' attribute exists 35 | if (obj.TryGetValue("__type", out var typeToken)) { 36 | string typeName = typeToken.Value(); 37 | 38 | switch (typeName) { 39 | case "AddedToken": 40 | return obj.ToObject(); 41 | default: 42 | throw new InvalidOperationException($"Unknown token type: {typeName}"); 43 | } 44 | } 45 | 46 | throw new InvalidOperationException($"Unknown token type: {obj}"); 47 | } 48 | 49 | public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { 50 | throw new NotImplementedException(); 51 | } 52 | } 53 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/Physics2DSettings.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!19 &1 4 | Physics2DSettings: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 6 7 | m_Gravity: {x: 0, y: -9.81} 8 | m_DefaultMaterial: {fileID: 0} 9 | m_VelocityIterations: 8 10 | m_PositionIterations: 3 11 | m_BounceThreshold: 1 12 | m_MaxLinearCorrection: 0.2 13 | m_MaxAngularCorrection: 8 14 | m_MaxTranslationSpeed: 100 15 | m_MaxRotationSpeed: 360 16 | m_BaumgarteScale: 0.2 17 | m_BaumgarteTimeOfImpactScale: 0.75 18 | m_TimeToSleep: 0.5 19 | m_LinearSleepTolerance: 0.01 20 | m_AngularSleepTolerance: 2 21 | m_DefaultContactOffset: 0.01 22 | m_ContactThreshold: 0 23 | m_JobOptions: 24 | serializedVersion: 2 25 | useMultithreading: 0 26 | useConsistencySorting: 0 27 | m_InterpolationPosesPerJob: 100 28 | m_NewContactsPerJob: 30 29 | m_CollideContactsPerJob: 100 30 | m_ClearFlagsPerJob: 200 31 | m_ClearBodyForcesPerJob: 200 32 | m_SyncDiscreteFixturesPerJob: 50 33 | m_SyncContinuousFixturesPerJob: 50 34 | m_FindNearestContactsPerJob: 100 35 | m_UpdateTriggerContactsPerJob: 100 36 | m_IslandSolverCostThreshold: 100 37 | m_IslandSolverBodyCostScale: 1 38 | m_IslandSolverContactCostScale: 10 39 | m_IslandSolverJointCostScale: 10 40 | m_IslandSolverBodiesPerJob: 50 41 | m_IslandSolverContactsPerJob: 50 42 | m_SimulationMode: 0 43 | m_SimulationLayers: 44 | serializedVersion: 2 45 | m_Bits: 4294967295 46 | m_MaxSubStepCount: 4 47 | m_MinSubStepFPS: 30 48 | m_UseSubStepping: 0 49 | m_UseSubStepContacts: 0 50 | m_QueriesHitTriggers: 1 51 | m_QueriesStartInColliders: 1 52 | m_CallbacksOnDisable: 1 53 | m_ReuseCollisionCallbacks: 1 54 | m_AutoSyncTransforms: 0 55 | m_GizmoOptions: 10 56 | m_LayerCollisionMatrix: ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff 57 | -------------------------------------------------------------------------------- /projects/Transformers/.gitignore: -------------------------------------------------------------------------------- 1 | # This .gitignore file should be placed at the root of your Unity project directory 2 | # 3 | # Get latest from https://github.com/github/gitignore/blob/master/Unity.gitignore 4 | # 5 | /[Ll]ibrary/ 6 | /[Tt]emp/ 7 | /[Oo]bj/ 8 | /[Bb]uild/ 9 | /[Bb]uilds/ 10 | /[Ll]ogs/ 11 | /[Uu]ser[Ss]ettings/ 12 | 13 | # MemoryCaptures can get excessive in size. 14 | # They also could contain extremely sensitive data 15 | /[Mm]emoryCaptures/ 16 | 17 | # Asset meta data should only be ignored when the corresponding asset is also ignored 18 | !/[Aa]ssets/**/*.meta 19 | 20 | # Uncomment this line if you wish to ignore the asset store tools plugin 21 | # /[Aa]ssets/AssetStoreTools* 22 | 23 | # Autogenerated Jetbrains Rider plugin 24 | /[Aa]ssets/Plugins/Editor/JetBrains* 25 | 26 | # Visual Studio cache directory 27 | .vs/ 28 | 29 | # Gradle cache directory 30 | .gradle/ 31 | 32 | # Autogenerated VS/MD/Consulo solution and project files 33 | ExportedObj/ 34 | .consulo/ 35 | *.csproj 36 | *.unityproj 37 | *.sln 38 | *.suo 39 | *.tmp 40 | *.user 41 | *.userprefs 42 | *.pidb 43 | *.booproj 44 | *.svd 45 | *.pdb 46 | *.mdb 47 | *.opendb 48 | *.VC.db 49 | 50 | # Unity3D generated meta files 51 | *.pidb.meta 52 | *.pdb.meta 53 | *.mdb.meta 54 | 55 | # Unity3D generated file on crash reports 56 | sysinfo.txt 57 | 58 | # Builds 59 | *.apk 60 | *.aab 61 | *.unitypackage 62 | 63 | # Crashlytics generated file 64 | crashlytics-build.properties 65 | 66 | # Packed Addressables 67 | /[Aa]ssets/[Aa]ddressable[Aa]ssets[Dd]ata/*/*.bin* 68 | 69 | # Temporary auto-generated Android Assets 70 | /[Aa]ssets/[Ss]treamingAssets/aa.meta 71 | /[Aa]ssets/[Ss]treamingAssets/aa/* 72 | 73 | 74 | ---------------------------------------------------- 75 | 76 | 77 | # ============ # 78 | # OS generated # 79 | # ============ # 80 | .DS_Store 81 | .DS_Store? 82 | ._* 83 | .Spotlight-V100 84 | .Trashes 85 | ehthumbs.db 86 | Thumbs.db 87 | 88 | # ====== # 89 | # custom # 90 | # ====== # 91 | /[Pp]ackages/[Cc]om.unity.asset-store-tools* 92 | Assets/Resources 93 | Assets/Resources.meta 94 | Assets/StreamingAssets 95 | Assets/StreamingAssets.meta -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/CacheUtils/DynamicCache.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using Unity.Sentis; 4 | 5 | namespace Doji.AI.Transformers { 6 | 7 | public class DynamicCache : Cache { 8 | 9 | public List KeyCache { get; set; } 10 | public List ValueCache { get; set; } 11 | 12 | private int _seenTokens; 13 | 14 | public DynamicCache() : base() { 15 | KeyCache = new List(); 16 | ValueCache = new List(); 17 | } 18 | 19 | public override (Tensor Key, Tensor Value) this[int index] { 20 | get { 21 | if (index < 0 || index >= Math.Min(KeyCache.Count, ValueCache.Count)) { 22 | throw new IndexOutOfRangeException($"Index ({index} is outside the bounds of the cached values (0-{KeyCache.Count - 1})."); 23 | } 24 | return (KeyCache[index], ValueCache[index]); 25 | } 26 | } 27 | 28 | public override IEnumerator<(Tensor Key, Tensor Value)> GetEnumerator() { 29 | // Ensure both lists have the same number of elements 30 | int count = Math.Min(KeyCache.Count, ValueCache.Count); 31 | 32 | for (int i = 0; i < count; i++) { 33 | yield return (KeyCache[i], ValueCache[i]); 34 | } 35 | } 36 | 37 | public override int GetSeqLength(int? layerIdx = 0) { 38 | if (KeyCache.Count <= layerIdx) { 39 | return 0; 40 | } 41 | return KeyCache[layerIdx.Value].shape[-2]; 42 | } 43 | 44 | public override void Update(Tensor keyStates, Tensor valueStates, int layerIdx) { 45 | // Update the number of seen tokens 46 | if (layerIdx == 0) { 47 | _seenTokens += keyStates.shape[-2]; 48 | } 49 | 50 | // Update the cache 51 | if (KeyCache.Count <= layerIdx) { 52 | KeyCache.Add(keyStates); 53 | ValueCache.Add(valueStates); 54 | } else { 55 | KeyCache[layerIdx] = Ops.Cat(KeyCache[layerIdx], keyStates, axis: -2); 56 | ValueCache[layerIdx] = Ops.Cat(ValueCache[layerIdx], valueStates, axis: -2); 57 | } 58 | } 59 | } 60 | } -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/TrieTest.cs: -------------------------------------------------------------------------------- 1 | using NUnit.Framework; 2 | using System.Collections.Generic; 3 | 4 | namespace Doji.AI.Transformers.Editor.Tests { 5 | 6 | public class TrieTest { 7 | 8 | [Test] 9 | public void TestTrieAddSingle() { 10 | Trie trie = new Trie(); 11 | trie.Add("Hello 友達"); 12 | 13 | string expected = "{\"H\": {\"e\": {\"l\": {\"l\": {\"o\": {\" \": {\"友\": {\"達\": {\"\": 1}}}}}}}}}"; 14 | Assert.AreEqual(expected, trie.ToString(), "Trie does not match expected value after adding 'Hello 友達'."); 15 | } 16 | 17 | [Test] 18 | public void TestTrieAddMultiple() { 19 | Trie trie = new Trie(); 20 | trie.Add("Hello 友達"); 21 | trie.Add("Hello"); 22 | 23 | string expected = "{\"H\": {\"e\": {\"l\": {\"l\": {\"o\": {\" \": {\"友\": {\"達\": {\"\": 1}}}, \"\": 1}}}}}}"; 24 | Assert.AreEqual(expected, trie.ToString(), "Trie does not match expected value after adding 'Hello'."); 25 | } 26 | 27 | [Test] 28 | public void TestTrieIdempotent() { 29 | Trie trie = new Trie(); 30 | trie.Add("Hello World 123 !§$%"); 31 | string first = trie.ToString(); 32 | trie.Add("Hello World 123 !§$%"); 33 | string second = trie.ToString(); 34 | 35 | Assert.AreEqual(first, second, "Trie is not idempotent. Adding twice the same word changed the trie."); 36 | } 37 | 38 | [Test] 39 | public void TestTrieSplitSimple() { 40 | Trie trie = new Trie(); 41 | 42 | List result = trie.Split("[CLS] This is a extra_id_100"); 43 | 44 | string[] expected = new string[] { "[CLS] This is a extra_id_100" }; 45 | CollectionAssert.AreEqual(expected, result, "Incorrect Tokenization."); 46 | } 47 | 48 | [Test] 49 | public void TestTrieSplit() { 50 | Trie trie = new Trie(); 51 | trie.Add("[CLS]"); 52 | trie.Add("extra_id_1"); 53 | trie.Add("extra_id_100"); 54 | 55 | List result = trie.Split("[CLS] This is a extra_id_100"); 56 | 57 | string[] expected = new string[] {"[CLS]", " This is a ", "extra_id_100"}; 58 | CollectionAssert.AreEqual(expected, result, "Lists are not equal"); 59 | } 60 | } 61 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils/Configurable.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System.IO; 3 | using UnityEngine; 4 | 5 | namespace Doji.AI.Transformers { 6 | 7 | /// 8 | /// All configuration parameters are stored under . 9 | /// Also provides a 10 | /// method for loading classes that inherit from . 11 | /// 12 | public abstract class Configurable where T : PretrainedConfig { 13 | 14 | public const string CONFIG_NAME = "config.json"; 15 | 16 | public T Config { get; } 17 | 18 | public Configurable(T config) { 19 | Config = config; 20 | } 21 | 22 | /// 23 | /// Load a config file from a Resources folder. 24 | /// 25 | protected static U LoadConfigFromTextAsset(string resourcePath) { 26 | TextAsset textAsset = Resources.Load(resourcePath); 27 | if (textAsset == null) { 28 | //Debug.LogError($"The TextAsset file was not found at: '{path}'"); 29 | return default; 30 | } 31 | 32 | U deserializedObject = JsonConvert.DeserializeObject(textAsset.text); 33 | Resources.UnloadAsset(textAsset); 34 | return deserializedObject; 35 | } 36 | 37 | /// 38 | /// Load a config file from either StreamingAssets or Resources. 39 | /// If no config is found, null is returned. 40 | /// 41 | protected static PretrainedConfig LoadConfig(string file) { 42 | if (File.Exists(file.StreamingAssetsPath())) { 43 | return JsonConvert.DeserializeObject(File.ReadAllText(file.StreamingAssetsPath())); 44 | } 45 | return LoadConfigFromTextAsset(file.ResourcePath()); 46 | } 47 | 48 | /// 49 | /// Load a given type from a file in either StreamingAssets or Resources. 50 | /// If no file is found, null is returned. 51 | /// 52 | protected static U Load(string file) { 53 | if (File.Exists(file.StreamingAssetsPath())) { 54 | return JsonConvert.DeserializeObject(File.ReadAllText(file.StreamingAssetsPath())); 55 | } 56 | return LoadConfigFromTextAsset(file.ResourcePath()); 57 | } 58 | } 59 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/BatchEncoding.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace Doji.AI.Transformers { 4 | 5 | /// 6 | /// Represents the encoded output for a batch of text inputs. 7 | /// 8 | public class BatchEncoding : Encoding { 9 | 10 | public BatchEncoding() : base() { } 11 | public BatchEncoding(Dictionary dict) : base(dict) { } 12 | 13 | public override IEnumerable InputIds { 14 | get { 15 | if (!TryGetValue("input_ids", out var inputIds)) { 16 | return null; 17 | } 18 | List flattenedList = new List(); 19 | foreach (var innerList in inputIds as List>) { 20 | flattenedList.AddRange(innerList); 21 | } 22 | return flattenedList; 23 | } 24 | } 25 | 26 | public override IEnumerable AttentionMask { 27 | get { 28 | if (!TryGetValue("attention_mask", out var inputIds)) { 29 | return null; 30 | } 31 | List flattenedList = new List(); 32 | foreach (var innerList in inputIds as List>) { 33 | flattenedList.AddRange(innerList); 34 | } 35 | return flattenedList; 36 | } 37 | } 38 | 39 | public override IEnumerable TokenTypeIds { 40 | get { 41 | if (!TryGetValue("token_type_ids", out var inputIds)) { 42 | return null; 43 | } 44 | List flattenedList = new List(); 45 | foreach (var innerList in inputIds as List>) { 46 | flattenedList.AddRange(innerList); 47 | } 48 | return flattenedList; 49 | } 50 | } 51 | 52 | /// 53 | /// Appends all values from to this dictionary 54 | /// which turns this 55 | /// 56 | /// 57 | public void Append(Encoding dict) { 58 | foreach (var kvp in dict) { 59 | if (!ContainsKey(kvp.Key)) { 60 | this[kvp.Key] = new List>(); 61 | } 62 | (this[kvp.Key] as List>).Add(kvp.Value as List); 63 | } 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | doji logo 3 | 4 | 5 | # Transformers 6 | A Unity package to run pretrained transformer models with Unity Sentis 7 | 8 | [OpenUPM] · [Documentation (coming soon)] · [Feedback/Questions] 9 | 10 | ## About 11 | 12 | This is essentially a C# port of Hugging Face’s [transformers] library. 13 | 14 | There are two use cases for this package right now: 15 | - It's used by the [com.doji.diffusers] package to run Stable Diffusion models in Unity (most SD models use a ClipTokenizer for prompting, newer pipelines require additional ones like T5Tokenizer) 16 | - To run small LLMs like [Phi-3](https://huggingface.co/julienkay/Phi-3-mini-4k-instruct_no_cache_uint8) in Unity (WIP) 17 | 18 | ### Installation 19 | 20 |
21 | via OpenUPM 22 | 23 | 1. In `Edit -> Project Settings -> Package Manager`, add a new scoped registry: 24 | 25 | Name: Doji 26 | URL: https://package.openupm.com 27 | Scope(s): com.doji 28 | 29 | 4. In the Package Manager install `com.doji.transformers` either by name or select it in the list under `Package Manager -> My Registries` 30 | 5. For the time being, you also have to use a custom fork of Sentis: In the package Manager -> `Package Manager -> Install package from git URL -> https://github.com/julienkay/com.unity.sentis.git` 31 | 32 |
33 | 34 | ## Roadmap: 35 | 36 | Tokenizers 37 | - [x] CLIPTokenizer 38 | - [x] LLamaTokenizer 39 | - [x] GPT2Tokenizer 40 | - [ ] T5Tokenizer 41 | 42 | LLMs 43 | - [ ] Phi-3 44 | 45 | The intention is to provide a similar API like Hugging Face's transformers library, so usage in Unity will look something like this: 46 | 47 | ```csharp 48 | var tokenizer = AutoTokenizer.FromPretrained("julienkay/Phi-3-mini-4k-instruct_no_cache_uint8"); 49 | var model = Phi3ForCausalLM.FromPretrained("julienkay/Phi-3-mini-4k-instruct_no_cache_uint8"); 50 | 51 | var inputs = tokenizer.Encode(""); 52 | var outputs = model.Generate(inputs); 53 | var predictedText = tokenizer.Decode(outputs); 54 | ``` 55 | 56 | [OpenUPM]: https://openupm.com/packages/com.doji.transformers 57 | [Documentation (coming soon)]: https://github.com/julienkay/com.doji.transformers 58 | [Feedback/Questions]: https://discussions.unity.com/t/stable-diffusion-diffusers-transformers-package/332701?u=julienkay 59 | [transformers]: https://github.com/huggingface/transformers 60 | [com.doji.diffusers]: https://github.com/julienkay/com.doji.diffusers 61 | -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/GraphicsSettings.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!30 &1 4 | GraphicsSettings: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 16 7 | m_Deferred: 8 | m_Mode: 1 9 | m_Shader: {fileID: 69, guid: 0000000000000000f000000000000000, type: 0} 10 | m_DeferredReflections: 11 | m_Mode: 1 12 | m_Shader: {fileID: 74, guid: 0000000000000000f000000000000000, type: 0} 13 | m_ScreenSpaceShadows: 14 | m_Mode: 1 15 | m_Shader: {fileID: 64, guid: 0000000000000000f000000000000000, type: 0} 16 | m_DepthNormals: 17 | m_Mode: 1 18 | m_Shader: {fileID: 62, guid: 0000000000000000f000000000000000, type: 0} 19 | m_MotionVectors: 20 | m_Mode: 1 21 | m_Shader: {fileID: 75, guid: 0000000000000000f000000000000000, type: 0} 22 | m_LightHalo: 23 | m_Mode: 1 24 | m_Shader: {fileID: 105, guid: 0000000000000000f000000000000000, type: 0} 25 | m_LensFlare: 26 | m_Mode: 1 27 | m_Shader: {fileID: 102, guid: 0000000000000000f000000000000000, type: 0} 28 | m_VideoShadersIncludeMode: 2 29 | m_AlwaysIncludedShaders: 30 | - {fileID: 7, guid: 0000000000000000f000000000000000, type: 0} 31 | - {fileID: 15104, guid: 0000000000000000f000000000000000, type: 0} 32 | - {fileID: 15105, guid: 0000000000000000f000000000000000, type: 0} 33 | - {fileID: 15106, guid: 0000000000000000f000000000000000, type: 0} 34 | - {fileID: 10753, guid: 0000000000000000f000000000000000, type: 0} 35 | - {fileID: 10770, guid: 0000000000000000f000000000000000, type: 0} 36 | - {fileID: 10783, guid: 0000000000000000f000000000000000, type: 0} 37 | m_PreloadedShaders: [] 38 | m_PreloadShadersBatchTimeLimit: -1 39 | m_SpritesDefaultMaterial: {fileID: 10754, guid: 0000000000000000f000000000000000, type: 0} 40 | m_CustomRenderPipeline: {fileID: 0} 41 | m_TransparencySortMode: 0 42 | m_TransparencySortAxis: {x: 0, y: 0, z: 1} 43 | m_DefaultRenderingPath: 1 44 | m_DefaultMobileRenderingPath: 1 45 | m_TierSettings: [] 46 | m_LightmapStripping: 0 47 | m_FogStripping: 0 48 | m_InstancingStripping: 0 49 | m_BrgStripping: 0 50 | m_LightmapKeepPlain: 1 51 | m_LightmapKeepDirCombined: 1 52 | m_LightmapKeepDynamicPlain: 1 53 | m_LightmapKeepDynamicDirCombined: 1 54 | m_LightmapKeepShadowMask: 1 55 | m_LightmapKeepSubtractive: 1 56 | m_FogKeepLinear: 1 57 | m_FogKeepExp: 1 58 | m_FogKeepExp2: 1 59 | m_AlbedoSwatchInfos: [] 60 | m_RenderPipelineGlobalSettingsMap: {} 61 | m_LightsUseLinearIntensity: 0 62 | m_LightsUseColorTemperature: 0 63 | m_LogWhenShaderIsCompiled: 0 64 | m_LightProbeOutsideHullStrategy: 1 65 | m_CameraRelativeLightCulling: 0 66 | m_CameraRelativeShadowCulling: 0 67 | -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Utils/OrderedDictionary.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Collections.Specialized; 3 | using System.Collections; 4 | using System.Linq; 5 | 6 | namespace Doji.AI.Transformers { 7 | 8 | /// 9 | /// A generic wrapper around . 10 | /// 11 | /// 12 | /// 13 | internal class OrderedDictionary : ICollection>, IEnumerable>, IEnumerable, IDictionary, IReadOnlyCollection> { 14 | private readonly OrderedDictionary _dictionary = new OrderedDictionary(); 15 | 16 | public ICollection Keys => _dictionary.Keys.Cast().ToList(); 17 | 18 | public ICollection Values => _dictionary.Values.Cast().ToList(); 19 | 20 | public int Count => _dictionary.Count; 21 | 22 | public bool IsReadOnly => false; 23 | 24 | public TValue this[TKey key] { 25 | get => (TValue)_dictionary[key]; 26 | set => _dictionary[key] = value; 27 | } 28 | 29 | public void Add(TKey key, TValue value) { 30 | _dictionary.Add(key, value); 31 | } 32 | 33 | public bool ContainsKey(TKey key) { 34 | return _dictionary.Contains(key); 35 | } 36 | 37 | public bool Remove(TKey key) { 38 | if (!ContainsKey(key)) return false; 39 | 40 | _dictionary.Remove(key); 41 | return true; 42 | } 43 | 44 | public bool TryGetValue(TKey key, out TValue value) { 45 | if (ContainsKey(key)) { 46 | value = this[key]; 47 | return true; 48 | } 49 | 50 | value = default; 51 | return false; 52 | } 53 | 54 | public void Add(KeyValuePair item) { 55 | _dictionary.Add(item.Key, item.Value); 56 | } 57 | 58 | public void Clear() { 59 | _dictionary.Clear(); 60 | } 61 | 62 | public bool Contains(KeyValuePair item) { 63 | return _dictionary.Contains(item.Key) && EqualityComparer.Default.Equals(this[item.Key], item.Value); 64 | } 65 | 66 | public void CopyTo(KeyValuePair[] array, int arrayIndex) { 67 | foreach (DictionaryEntry entry in _dictionary) { 68 | array[arrayIndex++] = new KeyValuePair((TKey)entry.Key, (TValue)entry.Value); 69 | } 70 | } 71 | 72 | public bool Remove(KeyValuePair item) { 73 | if (!Contains(item)) return false; 74 | 75 | _dictionary.Remove(item.Key); 76 | return true; 77 | } 78 | 79 | public IEnumerator> GetEnumerator() { 80 | foreach (DictionaryEntry entry in _dictionary) { 81 | yield return new KeyValuePair((TKey)entry.Key, (TValue)entry.Value); 82 | } 83 | } 84 | 85 | IEnumerator IEnumerable.GetEnumerator() { 86 | return GetEnumerator(); 87 | } 88 | } 89 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/EncodingParams.cs: -------------------------------------------------------------------------------- 1 | namespace Doji.AI.Transformers { 2 | 3 | public abstract partial class PreTrainedTokenizerBase { 4 | 5 | protected struct EncodingParams { 6 | public Input Text { get; set; } 7 | public Input TextPair { get; set; } 8 | public string TextTarget { get; set; } 9 | public string TextPairTarget { get; set; } 10 | public bool AddSpecialTokens { get; set; } 11 | public Padding Padding { get; set; } 12 | public Truncation Truncation { get; set; } 13 | public int? MaxLength { get; set; } 14 | public int Stride { get; set; } 15 | public bool IsSplitIntoWords { get; set; } 16 | public int? PadToMultipleOf { get; set; } 17 | public bool? ReturnTokenTypeIds { get; set; } 18 | public bool? ReturnAttentionMask { get; set; } 19 | public bool ReturnOverflowingTokens { get; set; } 20 | public bool ReturnSpecialTokensMask { get; set; } 21 | public bool ReturnOffsetsMapping { get; set; } 22 | public bool ReturnLength { get; set; } 23 | 24 | public EncodingParams( 25 | Input text = null, 26 | Input textPair = null, 27 | string textTarget = null, 28 | string textPairTarget = null, 29 | bool addSpecialTokens = true, 30 | Padding padding = Padding.None, 31 | Truncation truncation = Truncation.None, 32 | int? maxLength = null, 33 | int stride = 0, 34 | bool isSplitIntoWords = false, 35 | int? padToMultipleOf = null, 36 | bool? returnTokenTypeIds = null, 37 | bool? returnAttentionMask = null, 38 | bool returnOverflowingTokens = false, 39 | bool returnSpecialTokensMask = false, 40 | bool returnOffsetsMapping = false, 41 | bool returnLength = false) 42 | { 43 | Text = text; 44 | TextPair = textPair; 45 | TextTarget = textTarget; 46 | TextPairTarget = textPairTarget; 47 | AddSpecialTokens = addSpecialTokens; 48 | Padding = padding; 49 | Truncation = truncation; 50 | MaxLength = maxLength; 51 | Stride = stride; 52 | IsSplitIntoWords = isSplitIntoWords; 53 | PadToMultipleOf = padToMultipleOf; 54 | ReturnTokenTypeIds = returnTokenTypeIds; 55 | ReturnAttentionMask = returnAttentionMask; 56 | ReturnOverflowingTokens = returnOverflowingTokens; 57 | ReturnSpecialTokensMask = returnSpecialTokensMask; 58 | ReturnOffsetsMapping = returnOffsetsMapping; 59 | ReturnLength = returnLength; 60 | } 61 | } 62 | } 63 | } -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/InputSerializationTest.cs: -------------------------------------------------------------------------------- 1 | using NUnit.Framework; 2 | using Newtonsoft.Json; 3 | using System.Collections.Generic; 4 | 5 | namespace Doji.AI.Transformers.Editor.Tests { 6 | 7 | public class InputSerializationTest { 8 | 9 | class InputContainer { 10 | public Input Input { get; set; } 11 | } 12 | 13 | [Test] 14 | public void SingleInput() { 15 | string text = "test"; 16 | Input input = text; 17 | 18 | string s = JsonConvert.SerializeObject(input); 19 | var expectedJson = "{\"Type\":\"SingleInput\",\"Value\":\"test\"}"; 20 | Assert.That(s, Is.EqualTo(expectedJson)); 21 | 22 | Input deserialized = JsonConvert.DeserializeObject(s); 23 | Assert.IsInstanceOf(deserialized); 24 | Assert.AreEqual(text, (deserialized as SingleInput).Text); 25 | } 26 | 27 | [Test] 28 | public void BatchInput() { 29 | var sequence = new List() { "test1", "test2", "test3" }; 30 | Input input = (BatchInput)sequence; 31 | 32 | string s = JsonConvert.SerializeObject(input); 33 | var expectedJson = "{\"Type\":\"BatchInput\",\"Value\":[\"test1\",\"test2\",\"test3\"]}"; 34 | Assert.That(s, Is.EqualTo(expectedJson)); 35 | 36 | Input deserialized = JsonConvert.DeserializeObject(s); 37 | Assert.IsInstanceOf(deserialized); 38 | CollectionAssert.AreEqual(sequence, (deserialized as BatchInput).Sequence); 39 | } 40 | 41 | [Test] 42 | public void PretokenizedSingleInput() { 43 | var pretokenizedText = new List() { "test1", "test2", "test3" }; 44 | Input input = (PretokenizedSingleInput)pretokenizedText; 45 | string s = JsonConvert.SerializeObject(input); 46 | 47 | var expectedJson = "{\"Type\":\"PretokenizedSingleInput\",\"Value\":[\"test1\",\"test2\",\"test3\"]}"; 48 | Assert.That(s, Is.EqualTo(expectedJson)); 49 | 50 | Input deserialized = JsonConvert.DeserializeObject(s) as PretokenizedSingleInput; 51 | Assert.IsInstanceOf(deserialized); 52 | CollectionAssert.AreEqual(pretokenizedText, (deserialized as PretokenizedSingleInput).PretokenizedText); 53 | } 54 | 55 | [Test] 56 | public void PretokenizedBatchInput() { 57 | var sequence = new List>() { 58 | new List() { "batch1test1", "batch1test2", "batch1test3" }, 59 | new List() { "batch2test1", "batch2test2", "batch2test3" }, 60 | new List() { "batch3test1", "batch3test2", "batch3test3" }, 61 | }; 62 | Input input = (PretokenizedBatchInput)sequence; 63 | string s = JsonConvert.SerializeObject(input); 64 | 65 | var expectedJson = "{\"Type\":\"PretokenizedBatchInput\",\"Value\":[[\"batch1test1\",\"batch1test2\",\"batch1test3\"],[\"batch2test1\",\"batch2test2\",\"batch2test3\"],[\"batch3test1\",\"batch3test2\",\"batch3test3\"]]}"; 66 | Assert.That(s, Is.EqualTo(expectedJson)); 67 | 68 | Input deserialized = JsonConvert.DeserializeObject(s); 69 | Assert.IsInstanceOf(deserialized); 70 | CollectionAssert.AreEqual(sequence, (deserialized as PretokenizedBatchInput).Sequence); 71 | } 72 | } 73 | } -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/TokenizerConfigTest.cs: -------------------------------------------------------------------------------- 1 | using NUnit.Framework; 2 | 3 | namespace Doji.AI.Transformers.Editor.Tests { 4 | 5 | public class TokenizerConfigTest { 6 | 7 | [Test] 8 | public void TestDeserialize() { 9 | // https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/tokenizer/tokenizer_config.json 10 | string json = @"{""add_prefix_space"":false,""bos_token"":{""__type"":""AddedToken"",""content"":""<|startoftext|>"",""lstrip"":false,""normalized"":true,""rstrip"":false,""single_word"":false},""do_lower_case"":true,""eos_token"":{""__type"":""AddedToken"",""content"":""<|endoftext|>"",""lstrip"":false,""normalized"":true,""rstrip"":false,""single_word"":false},""errors"":""replace"",""model_max_length"":77,""name_or_path"":""openai/clip-vit-large-patch14"",""pad_token"":""<|endoftext|>"",""special_tokens_map_file"":""./special_tokens_map.json"",""tokenizer_class"":""CLIPTokenizer"",""unk_token"":{""__type"":""AddedToken"",""content"":""<|endoftext|>"",""lstrip"":false,""normalized"":true,""rstrip"":false,""single_word"":false}}"; 11 | TokenizerConfig config = TokenizerConfig.Deserialize(json); 12 | 13 | Assert.That(config, Is.Not.Null); 14 | Assert.That(config.BosToken.Content, Is.EqualTo("<|startoftext|>")); 15 | Assert.That(config.BosToken, Is.TypeOf(typeof(AddedToken))); 16 | AddedToken bosToken = config.BosToken as AddedToken; 17 | Assert.That(bosToken.Lstrip == false); 18 | Assert.That(bosToken.Normalized == true); 19 | Assert.That(bosToken.Rstrip == false); 20 | Assert.That(bosToken.SingleWord == false); 21 | Assert.That(config.DoLowerCase == true); 22 | Assert.That(config.PadToken, Is.TypeOf(typeof(TokenString))); 23 | Assert.That(config.ModelMaxLength, Is.EqualTo(77)); 24 | Assert.That(config.SepToken, Is.Null); 25 | Assert.That(config.ClsToken, Is.Null); 26 | Assert.That(config.MaskToken, Is.Null); 27 | } 28 | 29 | [Test] 30 | public void TestDeserializeAddedTokens() { 31 | // https://huggingface.co/stabilityai/sdxl-turbo/blob/main/tokenizer/tokenizer_config.json 32 | string json = @"{""add_prefix_space"":false,""added_tokens_decoder"":{""49406"":{""content"":""<|startoftext|>"",""lstrip"":false,""normalized"":true,""rstrip"":false,""single_word"":false,""special"":true},""49407"":{""content"":""<|endoftext|>"",""lstrip"":false,""normalized"":true,""rstrip"":false,""single_word"":false,""special"":true}},""bos_token"":""<|startoftext|>"",""clean_up_tokenization_spaces"":true,""do_lower_case"":true,""eos_token"":""<|endoftext|>"",""errors"":""replace"",""model_max_length"":77,""pad_token"":""<|endoftext|>"",""tokenizer_class"":""CLIPTokenizer"",""unk_token"":""<|endoftext|>""}"; 33 | TokenizerConfig config = TokenizerConfig.Deserialize(json); 34 | Assert.That(config, Is.Not.Null); 35 | Assert.That(config.BosToken.Content, Is.EqualTo("<|startoftext|>")); 36 | Assert.That(config.BosToken, Is.TypeOf(typeof(TokenString))); 37 | TokenString bosToken = config.BosToken as TokenString; 38 | Assert.That(config.DoLowerCase == true); 39 | Assert.That(config.PadToken, Is.TypeOf(typeof(TokenString))); 40 | Assert.That(config.ModelMaxLength, Is.EqualTo(77)); 41 | Assert.That(config.SepToken, Is.Null); 42 | Assert.That(config.ClsToken, Is.Null); 43 | Assert.That(config.MaskToken, Is.Null); 44 | Assert.That(config.AddedTokensDecoder, Is.Not.Null); 45 | } 46 | } 47 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/TokenizerConfig.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System; 3 | using System.Collections.Generic; 4 | 5 | namespace Doji.AI.Transformers { 6 | 7 | public class TokenizerConfig { 8 | 9 | [JsonProperty("add_prefix_space")] 10 | public bool? AddPrefixSpace = null; 11 | 12 | [JsonProperty("add_bos_token")] 13 | public bool? AddBosToken = null; 14 | 15 | [JsonProperty("add_eos_token")] 16 | public bool? AddEosToken = null; 17 | 18 | [JsonProperty("added_tokens_decoder")] 19 | [JsonConverter(typeof(AddedTokensConverter))] 20 | public Dictionary AddedTokensDecoder; 21 | 22 | [JsonProperty("bos_token")] 23 | [JsonConverter(typeof(TokenConverter))] 24 | public Token BosToken { get; set; } = null; 25 | 26 | [JsonProperty("eos_token")] 27 | [JsonConverter(typeof(TokenConverter))] 28 | public Token EosToken { get; set; } = null; 29 | 30 | [JsonProperty("unk_token")] 31 | [JsonConverter(typeof(TokenConverter))] 32 | public Token UnkToken { get; set; } = null; 33 | 34 | [JsonProperty("sep_token")] 35 | [JsonConverter(typeof(TokenConverter))] 36 | public Token SepToken { get; set; } = null; 37 | 38 | [JsonProperty("pad_token")] 39 | [JsonConverter(typeof(TokenConverter))] 40 | public Token PadToken { get; set; } = null; 41 | 42 | [JsonProperty("cls_token")] 43 | [JsonConverter(typeof(TokenConverter))] 44 | public Token ClsToken { get; set; } = null; 45 | 46 | [JsonProperty("mask_token")] 47 | [JsonConverter(typeof(TokenConverter))] 48 | public Token MaskToken { get; set; } = null; 49 | 50 | [JsonProperty("do_lower_case")] 51 | public bool DoLowerCase { get; set; } 52 | 53 | [JsonProperty("errors")] 54 | public string Errors { get; set; } = "replace"; 55 | 56 | [JsonProperty("legacy")] 57 | public bool? Legacy { get; set; } 58 | 59 | [JsonProperty("model_input_names")] 60 | public List ModelInputNames { get; set; } 61 | 62 | [JsonProperty("clean_up_tokenization_spaces")] 63 | public bool? CleanUpTokenizationSpaces { get; set; } 64 | 65 | [JsonProperty("split_special_tokens")] 66 | public bool? SplitSpecialTokens { get; set; } 67 | 68 | [JsonProperty("model_max_length")] 69 | public int? ModelMaxLength { get; set; } 70 | 71 | [JsonProperty("padding_side")] 72 | public Side? PaddingSide { get; set; } 73 | 74 | [JsonProperty("truncation_side")] 75 | public Side? TruncationSide { get; set; } 76 | 77 | [JsonProperty("tokenizer_class")] 78 | public string TokenizerClass { get; set; } 79 | 80 | public static TokenizerConfig Deserialize(string json) { 81 | TokenizerConfig config = JsonConvert.DeserializeObject(json); 82 | return config; 83 | } 84 | 85 | private class AddedTokensConverter : JsonConverter { 86 | public override bool CanConvert(Type objectType) { 87 | return true; 88 | } 89 | 90 | public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { 91 | var addedTokens = serializer.Deserialize>(reader); 92 | return addedTokens; 93 | } 94 | 95 | public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { 96 | throw new NotImplementedException(); 97 | } 98 | } 99 | } 100 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Auto/AutoModelForCausalLM.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System.IO; 3 | using System; 4 | using UnityEngine; 5 | using Unity.Sentis; 6 | 7 | namespace Doji.AI.Transformers { 8 | 9 | public static class AutoModelForCausalLM { 10 | 11 | public static string CONFIG_FILE = "config.json"; 12 | 13 | #if UNITY_EDITOR 14 | public static event Action OnModelRequested = (x) => { }; 15 | #endif 16 | 17 | internal static PretrainedConfig LoadPretrainedConfig(string pretrainedModelNameOrPath) { 18 | return LoadFromJson(pretrainedModelNameOrPath); 19 | } 20 | 21 | /// 22 | /// Loads an object of type from a json file 23 | /// either in StreamingAssets or Resources. 24 | /// 25 | internal static T LoadFromJson(string pretrainedModelNameOrPath) { 26 | string streamingAssetsPath = Path.Combine(Application.streamingAssetsPath, pretrainedModelNameOrPath, CONFIG_FILE); 27 | if (File.Exists(streamingAssetsPath)) { 28 | return LoadJsonFromFile(streamingAssetsPath); 29 | } 30 | string resourcePath = Path.Combine(pretrainedModelNameOrPath, Path.ChangeExtension(CONFIG_FILE, null)); 31 | return LoadJsonFromTextAsset(resourcePath); 32 | } 33 | 34 | /// 35 | /// Loads an object of type from a json file 36 | /// by deserializing using . 37 | /// 38 | private static T LoadJsonFromFile(string path) { 39 | #if !UNITY_STANDALONE 40 | throw new NotImplementedException(); 41 | #endif 42 | if (!File.Exists(path)) { 43 | throw new FileNotFoundException($"The .json file was not found at: '{path}'"); 44 | } 45 | string json = File.ReadAllText(path); 46 | T deserializedObject = JsonConvert.DeserializeObject(json); 47 | return deserializedObject; 48 | } 49 | 50 | /// 51 | /// Loads an object of type from a text asset in Resources 52 | /// by deserializing using . 53 | /// 54 | /// The path to the text file in the Resources folder 55 | private static T LoadJsonFromTextAsset(string path) { 56 | TextAsset textAsset = Resources.Load(path); 57 | if (textAsset == null) { 58 | throw new FileNotFoundException($"The TextAsset file was not found at: '{path}'"); 59 | } 60 | T deserializedObject = JsonConvert.DeserializeObject(textAsset.text); 61 | Resources.UnloadAsset(textAsset); 62 | return deserializedObject; 63 | } 64 | 65 | /// 66 | /// Loads the given pretrained model (with a causal language modeling head). 67 | /// 68 | public static PreTrainedModel FromPretrained(string pretrainedModelNameOrPath, BackendType backend = BackendType.GPUCompute) { 69 | #if UNITY_EDITOR 70 | OnModelRequested?.Invoke(pretrainedModelNameOrPath); 71 | #endif 72 | // use the tokenizer_config file to get the specific tokenizer class. 73 | PretrainedConfig config = LoadPretrainedConfig(pretrainedModelNameOrPath); 74 | if (config.Architectures == null || config.Architectures.Count == 0) { 75 | throw new Exception($"No architecture found in the config for '{pretrainedModelNameOrPath}'."); 76 | } 77 | string arch = config.Architectures[0]; 78 | 79 | return arch switch { 80 | "Phi3ForCausalLM" => Phi3ForCausalLM.FromPretrained(pretrainedModelNameOrPath, backend), 81 | _ => throw new NotImplementedException($"'{arch}'architecture not yet implemented."), 82 | }; 83 | } 84 | } 85 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/InputConverter.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | 6 | namespace Doji.AI.Transformers { 7 | 8 | public class InputConverter : JsonConverter { 9 | public override bool CanConvert(Type objectType) { 10 | return typeof(Input).IsAssignableFrom(objectType); 11 | } 12 | 13 | public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { 14 | JObject jsonObject = JObject.Load(reader); 15 | if (jsonObject["Type"] == null) { 16 | string legacyInput = jsonObject.Value(); 17 | return new SingleInput(legacyInput); 18 | } 19 | string type = jsonObject["Type"].Value(); 20 | 21 | switch (type) { 22 | case nameof(SingleInput): 23 | return new SingleInput(jsonObject["Value"].Value()); 24 | case nameof(BatchInput): 25 | var batch = jsonObject["Value"]; 26 | return new BatchInput(batch.ToObject>(serializer)); 27 | case nameof(PretokenizedSingleInput): 28 | var pretokenized = jsonObject["Value"]; 29 | return new PretokenizedSingleInput(pretokenized.ToObject>(serializer)); 30 | case nameof(PretokenizedBatchInput): 31 | var pretokenizedBatch = jsonObject["Value"]; 32 | return new PretokenizedBatchInput(pretokenizedBatch.ToObject>>(serializer)); 33 | default: 34 | throw new InvalidOperationException("Unknown Input type"); 35 | } 36 | } 37 | 38 | public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { 39 | JObject obj = new JObject(); 40 | obj["Type"] = value.GetType().Name; 41 | if (value is SingleInput singleInput) { 42 | obj["Value"] = JToken.FromObject(singleInput.Text, serializer); 43 | } else if (value is BatchInput batchInput) { 44 | obj["Value"] = JToken.FromObject(batchInput.Sequence, serializer); 45 | } else if (value is PretokenizedSingleInput pretokenizedSingleInput) { 46 | obj["Value"] = JToken.FromObject(pretokenizedSingleInput.PretokenizedText, serializer); 47 | } else if (value is PretokenizedBatchInput pretokenizedBatchInput) { 48 | obj["Value"] = JToken.FromObject(pretokenizedBatchInput.Sequence, serializer); 49 | } 50 | obj.WriteTo(writer); 51 | } 52 | /*private InputType _currentObjectType; 53 | 54 | public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { 55 | var jobj = JObject.ReadFrom(reader); 56 | _currentObjectType = jobj["Type"].ToObject(); 57 | return base.ReadJson(jobj.CreateReader(), objectType, existingValue, serializer); 58 | } 59 | 60 | public override Input Create(Type objectType) { 61 | switch (_currentObjectType) { 62 | case InputType.SingleInput: 63 | return new SingleInput(); 64 | case InputType.ChildClass2: 65 | return new Child2(); 66 | default: 67 | throw new NotImplementedException(); 68 | } 69 | }*/ 70 | 71 | /*public override void WriteJson(JsonWriter writer, Input value, JsonSerializer serializer) { 72 | if (value is SingleInput singleInput) { 73 | serializer.Serialize(writer, singleInput.Text); 74 | } else if (value is BatchInput batchInput) { 75 | serializer.Serialize(writer, batchInput.Sequence); 76 | } else if (value is PretokenizedSingleInput pretokenizedSingleInput) { 77 | serializer.Serialize(writer, pretokenizedSingleInput.PretokenizedText); 78 | } else if (value is PretokenizedBatchInput pretokenizedBatchInput) { 79 | serializer.Serialize(writer, pretokenizedBatchInput.Sequence); 80 | } 81 | } 82 | 83 | public override Input ReadJson(JsonReader reader, Type objectType, Input existingValue, bool hasExistingValue, JsonSerializer serializer) { 84 | JObject jsonObject = JObject.Load(reader); 85 | 86 | // Determine the type of Input to deserialize based on the properties present in the JSON object 87 | if (jsonObject.ContainsKey("Text")) { 88 | return new SingleInput(jsonObject.Value("Text")); 89 | } else if (jsonObject.ContainsKey("PretokenizedText")) { 90 | return new PretokenizedSingleInput(jsonObject.Value>("PretokenizedText")); 91 | } else if (jsonObject.ContainsKey("Sequence")) { 92 | 93 | } 94 | 95 | throw new JsonSerializationException($"Unable to deserialize {nameof(Input)}."); 96 | }*/ 97 | } 98 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/ConfigurationUtils/PretrainedModel.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using System; 3 | using Unity.Sentis; 4 | using UnityEngine; 5 | using System.Collections.Generic; 6 | 7 | namespace Doji.AI.Transformers { 8 | public abstract partial class PreTrainedModel : Configurable, IDisposable { 9 | 10 | public const string MODEL_NAME = "model"; 11 | public virtual string MainInputName { get; } = "input_ids"; 12 | 13 | public GenerationConfig GenerationConfig { get; } 14 | 15 | protected bool IsStateful { get; } = false; 16 | 17 | // Flash Attention 2 support 18 | protected bool SupportsFlashAttn2 { get; } = false; 19 | 20 | // SDPA support 21 | protected bool SupportsSdpa { get; } = false; 22 | 23 | // Has support for a `Cache` instance as `past_key_values`? Does it support a `StaticCache`? 24 | protected bool SupportsCacheClass { get; } = false; 25 | protected bool SupportsStaticCache { get; } = false; 26 | 27 | // Has support for a `QuantoQuantizedCache` instance as `past_key_values` 28 | protected bool SupportsQuantizedCache { get; } = false; 29 | 30 | 31 | /* In original code these are retrieved by inspecting attributes/arguments 32 | t.b.d. if there's a good way to implement this generically without resorting to Reflection */ 33 | protected abstract bool AcceptsAttentionMask { get; } 34 | protected abstract bool HasEncoder { get; } 35 | protected virtual object Encoder { get; } = null; 36 | 37 | /// 38 | /// Which to run the model with. 39 | /// 40 | private BackendType Backend { get; set; } = BackendType.GPUCompute; 41 | 42 | /// 43 | /// The runtime model. 44 | /// 45 | private Model _model; 46 | protected Worker _worker; 47 | protected Ops _ops; 48 | 49 | public PreTrainedModel(Model model, PretrainedConfig config, GenerationConfig generationConfig = null, BackendType backend = BackendType.GPUCompute) : base(config) { 50 | Backend = backend; 51 | InitializeNetwork(model); 52 | GenerationConfig = generationConfig; 53 | } 54 | 55 | protected virtual void InitializeNetwork(Model model) { 56 | if (model == null) { 57 | throw new ArgumentException("Model was null", nameof(model)); 58 | } 59 | 60 | _model = model; 61 | _worker = new Worker(_model, Backend); 62 | _ops = new Ops(Backend); 63 | } 64 | 65 | public virtual void Dispose() { 66 | _worker?.Dispose(); 67 | _ops?.Dispose(); 68 | } 69 | 70 | public abstract ModelOutput Execute(Dictionary modelInputs); 71 | 72 | /// 73 | /// Loads a Sentis from a in Resources. 74 | /// 75 | /// The path to the model file in the Resources folder 76 | private static Model LoadFromModelAsset(string path) { 77 | ModelAsset modelAsset = Resources.Load(path); 78 | if (modelAsset == null) { 79 | return null; 80 | } 81 | Model model = ModelLoader.Load(modelAsset); 82 | Resources.UnloadAsset(modelAsset); 83 | return model; 84 | } 85 | 86 | /// 87 | /// Load a pretrained model either from StreamingAssets (in .sentis format) 88 | /// or from a Resources folder (in .onnx format). 89 | /// If no config is found null is returned. 90 | /// 91 | protected static Model LoadModel(string model) { 92 | if (File.Exists(model.StreamingAssetsPathForModel(MODEL_NAME))) { 93 | return ModelLoader.Load(model.StreamingAssetsPathForModel(MODEL_NAME)); 94 | } 95 | return LoadFromModelAsset(model.ResourcePathForModel(MODEL_NAME)); 96 | } 97 | 98 | private static C FromConfig(PretrainedConfig config, Model model, GenerationConfig generationConfig, BackendType backend) where C : PreTrainedModel { 99 | try { 100 | return (C)Activator.CreateInstance(typeof(C), model, config, generationConfig, backend); 101 | } catch (Exception e) { 102 | Log.Error($"{e.GetType().Name} when trying to create class of type '{typeof(C).Name}'"); 103 | throw e; 104 | } 105 | } 106 | 107 | protected static C FromPretrained(string pretrainedModelNameOrPath, BackendType backend) where C : PreTrainedModel { 108 | string configFile = Path.Combine(pretrainedModelNameOrPath, CONFIG_NAME); 109 | string generationConfigFile = Path.Combine(pretrainedModelNameOrPath, "generation_config.json"); 110 | var config = LoadConfig(configFile) ?? throw new FileNotFoundException($"File '{configFile}' not found for: '{typeof(C).Name}'"); 111 | var model = LoadModel(pretrainedModelNameOrPath) ?? throw new FileNotFoundException($"Model file for '{pretrainedModelNameOrPath}' not found for: '{typeof(C).Name}'"); 112 | var generationConfig = Load(generationConfigFile); 113 | return FromConfig(config, model, generationConfig, backend); 114 | } 115 | } 116 | } -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/LLamaTokenizerTest.cs: -------------------------------------------------------------------------------- 1 | using NUnit.Framework; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | 6 | namespace Doji.AI.Transformers.Editor.Tests { 7 | 8 | /// 9 | /// LLamaTokenizerTest test with a reduced vocabulary. 10 | /// 11 | public class LLamaTokenizerTest { 12 | 13 | public static IEnumerable TokenizeTestData { 14 | get { 15 | yield return new TestCaseData("The quick brown fox jumps over the lazy dog.").Returns(new List() { "▁The", "▁quick", "▁brown", "▁fo", "x", "▁j", "umps", "▁over", "▁the", "▁lazy", "▁dog", "." }); 16 | } 17 | } 18 | 19 | public static IEnumerable EncodeTestData { 20 | get { 21 | yield return new TestCaseData("The quick brown fox jumps over the lazy dog.").Returns(new List() { 450, 4996, 17354, 1701, 29916, 432, 17204, 975, 278, 17366, 11203, 29889 }); 22 | } 23 | } 24 | 25 | private static List RoundtripInput = new List() { "The quick brown fox jumps over the lazy dog." }; 26 | 27 | [Test] 28 | [TestCaseSource(nameof(TokenizeTestData))] 29 | public List TestTokenize(string text) { 30 | LlamaTokenizer t = CreateTokenizer(); 31 | List tokens = t.Tokenize(text); 32 | return tokens; 33 | } 34 | 35 | [Test] 36 | public void TestEncodeType() { 37 | LlamaTokenizer t = CreateTokenizer(); 38 | var encoding = t.Encode("lower newer"); 39 | 40 | Assert.IsTrue(encoding.ContainsKey("input_ids"), "Encoded ids not found in 'input_ids'."); 41 | object encodedIds = encoding["input_ids"]; 42 | Assert.IsTrue(encodedIds is ICollection, "Unexpected type for encoded text."); 43 | Assert.IsTrue(encoding is InputEncoding, "Unexpected type for encoding."); 44 | } 45 | 46 | [Test] 47 | [TestCaseSource(nameof(EncodeTestData))] 48 | public IEnumerable TestEncode(string text) { 49 | LlamaTokenizer t = CreateTokenizer(); 50 | InputEncoding encoding = t.Encode(text) as InputEncoding; 51 | var encodedIds = encoding.InputIds; 52 | return encodedIds; 53 | } 54 | 55 | [Test] 56 | public void TestRoundtrip([ValueSource(nameof(RoundtripInput))] string prompt) { 57 | LlamaTokenizer t = CreateTokenizer(); 58 | var result = t.Decode(t.Encode(prompt).InputIds.ToList(), skipSpecialTokens: true); 59 | Assert.That(result, Is.EqualTo(prompt)); 60 | } 61 | 62 | /// 63 | /// Creates a LlamaTokenizer. 64 | /// 65 | private LlamaTokenizer CreateTokenizer() { 66 | // create LlamaTokenizer with microsoft/Phi-3-mini-4k-instruct settings 67 | string tokenizerConfig = "{\"add_bos_token\":false,\"add_eos_token\":false,\"added_tokens_decoder\":{\"0\":{\"content\":\"\",\"lstrip\":false,\"normalized\":false,\"rstrip\":false,\"single_word\":false,\"special\":true},\"1\":{\"content\":\"\",\"lstrip\":false,\"normalized\":false,\"rstrip\":false,\"single_word\":false,\"special\":true},\"2\":{\"content\":\"\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":false},\"32000\":{\"content\":\"<|endoftext|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":false,\"single_word\":false,\"special\":true},\"32001\":{\"content\":\"<|assistant|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32002\":{\"content\":\"<|placeholder1|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32003\":{\"content\":\"<|placeholder2|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32004\":{\"content\":\"<|placeholder3|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32005\":{\"content\":\"<|placeholder4|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32006\":{\"content\":\"<|system|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32007\":{\"content\":\"<|end|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32008\":{\"content\":\"<|placeholder5|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32009\":{\"content\":\"<|placeholder6|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true},\"32010\":{\"content\":\"<|user|>\",\"lstrip\":false,\"normalized\":false,\"rstrip\":true,\"single_word\":false,\"special\":true}},\"bos_token\":\"\",\"chat_template\":\"{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\\n' + message['content'] + '<|end|>\\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\\n' + message['content'] + '<|end|>\\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\\n' + message['content'] + '<|end|>\\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\\n' }}{% else %}{{ eos_token }}{% endif %}\",\"clean_up_tokenization_spaces\":false,\"eos_token\":\"<|endoftext|>\",\"legacy\":false,\"model_max_length\":4096,\"pad_token\":\"<|endoftext|>\",\"padding_side\":\"left\",\"sp_model_kwargs\":{},\"tokenizer_class\":\"LlamaTokenizer\",\"unk_token\":\"\",\"use_default_system_prompt\":false}"; 68 | var config = TokenizerConfig.Deserialize(tokenizerConfig); 69 | string modelPath = "Packages/com.doji.transformers/Tests/Editor/Resources/Phi-3-mini-4k-instruct/tokenizer.model"; 70 | return new LlamaTokenizer(modelPath, config); 71 | } 72 | } 73 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Phi3/Phi3ForCausalLM.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using Unity.Sentis; 5 | 6 | namespace Doji.AI.Transformers { 7 | 8 | public class Phi3ForCausalLM : PreTrainedModel { 9 | 10 | protected override bool AcceptsAttentionMask => true; 11 | protected override bool HasEncoder => false; 12 | 13 | public Phi3ForCausalLM(Model model, PretrainedConfig config, GenerationConfig generationConfig = null, BackendType backend = BackendType.GPUCompute) : base(model, config, generationConfig, backend) { } 14 | 15 | /// 16 | /// Instantiate a Phi3 model from a JSON configuration file. 17 | /// 18 | public static Phi3ForCausalLM FromPretrained(string model, BackendType backend = BackendType.GPUCompute) { 19 | return FromPretrained(model, backend); 20 | } 21 | 22 | public override ModelOutput Execute(Dictionary modelInputs) { 23 | _worker.Schedule(modelInputs.Values.ToArray()); 24 | var logits = _worker.PeekOutput("logits") as Tensor; 25 | return new CausalLMOutputWithPast(logits); 26 | } 27 | 28 | protected override Dictionary PrepareInputsForGeneration( 29 | Tensor inputIds, 30 | Kwargs kwargs) 31 | { 32 | Cache pastKeyValues = kwargs.Get("past_key_values"); 33 | Tensor attentionMask = kwargs.Get>("attention_mask"); 34 | Tensor inputsEmbeds = kwargs.Get>("inputs_embeds"); 35 | Tensor cachePosition = kwargs.Get>("cache_position"); 36 | Tensor positionIds = kwargs.Get>("position_ids"); 37 | 38 | // If we have cache: let's slice `inputIds` through `cachePosition`, to keep only the unprocessed tokens 39 | if (pastKeyValues != null) { 40 | if (inputsEmbeds != null) { 41 | // Exception 1: when passing input_embeds, inputIds may be missing entries 42 | inputIds = _ops.Slice(inputIds, .., ^cachePosition.shape[0]..); 43 | } else if (inputIds.shape[1] != cachePosition.shape[0]) { 44 | var indices = _ops.Expand(cachePosition, new TensorShape(inputIds.shape[0], cachePosition.shape[0])); 45 | inputIds = _ops.GatherElements(inputIds, indices, 0); 46 | } else { 47 | ;// Exception 2: some generation methods do special slicing of inputIds, so we don't need to do it here 48 | } 49 | } 50 | 51 | if (attentionMask != null && positionIds == null) { 52 | // create positionIds on the fly for batch generation 53 | positionIds = _ops.Sub(_ops.CumSum(attentionMask, -1), 1); 54 | positionIds = _ops.MaskedFill(positionIds, _ops.Neg(attentionMask), 1); 55 | if (pastKeyValues != null) { 56 | positionIds = _ops.Slice(positionIds, .., ^inputIds.shape[1]..); 57 | } 58 | } 59 | 60 | Dictionary modelInputs; 61 | //if `inputsEmbeds` are passed, we only want to use them in the 1st generation step 62 | if (inputsEmbeds != null && cachePosition[0] == 0) { 63 | modelInputs = new() { { "inputs_embeds", inputsEmbeds }, { "input_ids", null } }; 64 | } else { 65 | modelInputs = new() { { "input_ids", inputIds }, { "inputs_embeds", null } }; 66 | } 67 | 68 | TensorShape shape; 69 | if (pastKeyValues is StaticCache && attentionMask.shape.rank == 2) { 70 | if (modelInputs["inputs_embeds"] != null) { 71 | shape = modelInputs["inputs_embeds"].shape; 72 | } else { 73 | shape = modelInputs["input_ids"].shape; 74 | } 75 | int batchSize = shape[0]; 76 | int sequenceLength = shape[1]; 77 | 78 | throw new NotImplementedException("_prepare_4d_causal_attention_mask_with_cachePosition"); 79 | /*attention_mask = _prepare_4d_causal_attention_mask_with_cachePosition( 80 | attention_mask, 81 | sequence_length = sequence_length, 82 | target_length = past_key_values.get_max_length(), 83 | dtype = dtype, 84 | device = device, 85 | min_dtype = min_dtype, 86 | cachePosition = cachePosition, 87 | batch_size = batch_size, 88 | );*/ 89 | } 90 | 91 | modelInputs["position_ids"] = positionIds; 92 | modelInputs["cache_position"] = cachePosition; 93 | modelInputs["attention_mask"] = attentionMask; 94 | 95 | // prepare past_key_values 96 | if (!kwargs.Get("use_cache", true)) { 97 | return modelInputs; 98 | } 99 | Cache cache = kwargs["past_key_values"] as Cache; 100 | for (int i = 0; i < 32; i++) { 101 | string key = $"past_key_values.{i}.key"; 102 | string value = $"past_key_values.{i}.value"; 103 | if (cache.GetSeqLength(i) == 0) { 104 | // create empty tensors for initial loop 105 | modelInputs[key] = _ops.AllocNoData(new TensorShape(inputIds.shape[0], 32, 0, 96)) as Tensor; 106 | modelInputs[value] = _ops.AllocNoData(new TensorShape(inputIds.shape[0], 32, 0, 96)); 107 | cache.Update(modelInputs[key], modelInputs[value], i); 108 | } else { 109 | modelInputs[key] = cache[i].Key; 110 | modelInputs[value] = cache[i].Value; 111 | } 112 | } 113 | return modelInputs; 114 | } 115 | } 116 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Auto/AutoTokenizer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System; 3 | using System.IO; 4 | using UnityEngine; 5 | using static Doji.AI.Transformers.PreTrainedTokenizerBase; 6 | 7 | namespace Doji.AI.Transformers { 8 | 9 | public static class AutoTokenizer { 10 | 11 | #if UNITY_EDITOR 12 | public static event Action OnModelRequested = (x) => { }; 13 | #endif 14 | 15 | internal static TokenizerConfig LoadTokenizerConfig(string pretrainedModelNameOrPath) { 16 | return LoadFromJson(pretrainedModelNameOrPath); 17 | } 18 | 19 | /// 20 | /// Loads an object of type from a json file 21 | /// either in StreamingAssets or Resources. 22 | /// 23 | internal static T LoadFromJson(string pretrainedModelNameOrPath) { 24 | string streamingAssetsPath = Path.Combine(Application.streamingAssetsPath, pretrainedModelNameOrPath, TOKENIZER_CONFIG_FILE); 25 | if (File.Exists(streamingAssetsPath)) { 26 | return LoadJsonFromFile(streamingAssetsPath); 27 | } 28 | string resourcePath = Path.Combine(pretrainedModelNameOrPath, Path.ChangeExtension(TOKENIZER_CONFIG_FILE, null)); 29 | return LoadJsonFromTextAsset(resourcePath); 30 | } 31 | 32 | /// 33 | /// Loads an object of type from a json file 34 | /// by deserializing using . 35 | /// 36 | private static T LoadJsonFromFile(string path) { 37 | #if !UNITY_STANDALONE 38 | throw new NotImplementedException(); 39 | #endif 40 | if (!File.Exists(path)) { 41 | throw new FileNotFoundException($"The .json file was not found at: '{path}'"); 42 | } 43 | string json = File.ReadAllText(path); 44 | T deserializedObject = JsonConvert.DeserializeObject(json); 45 | return deserializedObject; 46 | } 47 | 48 | /// 49 | /// Loads an object of type from a text asset in Resources 50 | /// by deserializing using . 51 | /// 52 | /// The path to the text file in the Resources folder 53 | private static T LoadJsonFromTextAsset(string path) { 54 | TextAsset textAsset = Resources.Load(path); 55 | if (textAsset == null) { 56 | throw new FileNotFoundException($"The TextAsset file was not found at: '{path}'"); 57 | } 58 | T deserializedObject = JsonConvert.DeserializeObject(textAsset.text); 59 | Resources.UnloadAsset(textAsset); 60 | return deserializedObject; 61 | } 62 | 63 | /// 64 | /// Returns a path for the tokenizer.model file in either StreamingAssets or Resources. 65 | /// 66 | private static string GetTokenizerModelPath(string pretrainedModelNameOrPath) { 67 | string streamingAssetsPath = Path.Combine(Application.streamingAssetsPath, pretrainedModelNameOrPath, "tokenizer.model"); 68 | if (File.Exists(streamingAssetsPath)) { 69 | return streamingAssetsPath; 70 | } 71 | string resourcePath = Path.Combine(pretrainedModelNameOrPath, "tokenizer"); 72 | var tokenizerModel = Resources.Load(resourcePath); 73 | if (tokenizerModel == null) { 74 | throw new Exception($"Tokenizer model asset for '{pretrainedModelNameOrPath} could not be loaded'"); 75 | } 76 | 77 | // since a path is required we extract the tokenizer model loaded from Resources and return that path. 78 | string tmpModelPath = CreateTokenizerModelAtPath(tokenizerModel, pretrainedModelNameOrPath); 79 | return tmpModelPath; 80 | } 81 | 82 | /// 83 | /// Writes the contents of a tokenizer model to a temporary file and returns the path. 84 | /// 85 | private static string CreateTokenizerModelAtPath(TokenizerModelAsset tokenizerModel, string pretrainedModelNameOrPath) { 86 | if (tokenizerModel == null) { 87 | UnityEngine.Debug.LogError("TextAsset is null. Cannot write to temp file."); 88 | return null; 89 | } 90 | string folderPath = Path.Combine(Application.temporaryCachePath, pretrainedModelNameOrPath); 91 | string tempPath = Path.Combine(folderPath, "tokenizer.model"); 92 | if (!Directory.Exists(folderPath)) { 93 | Directory.CreateDirectory(folderPath); 94 | } 95 | File.WriteAllBytes(tempPath, tokenizerModel.ModelData); 96 | return tempPath; 97 | } 98 | 99 | private static string ReplaceFileName(string originalFilePath, string newFileName) { 100 | string directory = Path.GetDirectoryName(originalFilePath); 101 | string newFilePath = Path.Combine(directory, newFileName); 102 | return newFilePath; 103 | } 104 | 105 | /// 106 | /// Loads the given tokenizer from a pretrained model vocabulary. 107 | /// 108 | public static PreTrainedTokenizerBase FromPretrained(string pretrainedModelNameOrPath) { 109 | #if UNITY_EDITOR 110 | OnModelRequested?.Invoke(pretrainedModelNameOrPath); 111 | #endif 112 | // use the tokenizer_config file to get the specific tokenizer class. 113 | TokenizerConfig config = LoadTokenizerConfig(pretrainedModelNameOrPath); 114 | 115 | switch (config.TokenizerClass) { 116 | case "CLIPTokenizer": 117 | return new ClipTokenizer(null, null); 118 | case "LlamaTokenizer": 119 | string llamaVocabPath = GetTokenizerModelPath(pretrainedModelNameOrPath); 120 | return new LlamaTokenizer(llamaVocabPath, config); 121 | default: 122 | throw new NotImplementedException($"'{config.TokenizerClass}' not yet implemented."); 123 | } 124 | } 125 | } 126 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/Input.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | 6 | namespace Doji.AI.Transformers { 7 | 8 | /// 9 | /// Represents input for a tokenizer with explicit types. 10 | /// Inputs can either be a single text, a batch/sequence of text, 11 | /// pretokenized text, or a sequence of pretokenized texts. 12 | /// 13 | /// 14 | /// string and List[string[string]] have implicit conversions in the 15 | /// base class because they are not ambiguous. For the others, when 16 | /// calling methods like 17 | /// you can use disambiguate between sequences of text and pretokenized text 18 | /// by using the generic version and specifying the type like so; 19 | /// 20 | /// tokenizer.Encode(myList); 21 | /// tokenizer.Encode(myList); 22 | /// 23 | /// 24 | [JsonConverter(typeof(InputConverter))] 25 | public abstract class Input { 26 | 27 | public static implicit operator Input(string text) { 28 | if (text == null) { return null; } 29 | return new SingleInput(text); 30 | } 31 | 32 | public static implicit operator Input(List> pretokenizedSequences) { 33 | if (pretokenizedSequences == null) { return null; } 34 | return new PretokenizedBatchInput(pretokenizedSequences); 35 | } 36 | 37 | /// 38 | /// Does the input represent a sequence/batch? 39 | /// 40 | public bool IsBatch() { 41 | return this is BatchInput || this is PretokenizedBatchInput; 42 | } 43 | 44 | /// 45 | /// Is the input already pretokenized? 46 | /// 47 | public bool IsPretokenized() { 48 | return this is BatchInput || this is PretokenizedBatchInput; 49 | } 50 | 51 | public abstract override string ToString(); 52 | } 53 | 54 | public interface IBatchInput { 55 | public IList Sequence { get; set; } 56 | public int BatchSize { 57 | get { 58 | return this.Sequence.Count; 59 | } 60 | } 61 | } 62 | 63 | /// 64 | /// Base class for tokenizer inputs that represent text or sequences/batches of text. 65 | /// 66 | public abstract class TextInput : Input { 67 | 68 | public static implicit operator TextInput(List sequence) { 69 | if (sequence == null) { return null; } 70 | return new BatchInput(sequence); 71 | } 72 | 73 | public static explicit operator string(TextInput textInput) { 74 | if (textInput is SingleInput input) { 75 | return (string)input; 76 | } else { 77 | throw new System.InvalidCastException($"The specified cast from {textInput.GetType()} to string is not valid."); 78 | } 79 | } 80 | } 81 | 82 | /// 83 | /// Represents a single text input for the tokenizer. 84 | /// 85 | public class SingleInput : TextInput { 86 | 87 | /// 88 | /// The text. 89 | /// 90 | public string Text { get; set; } 91 | 92 | public static explicit operator string(SingleInput input) => input.Text; 93 | 94 | public SingleInput(string text) { 95 | Text = text; 96 | } 97 | 98 | public override string ToString() { 99 | return Text; 100 | } 101 | } 102 | 103 | /// 104 | /// Represents a sequence/batch of inputs for the tokenizer. 105 | /// 106 | public class BatchInput : TextInput, IBatchInput { 107 | 108 | /// 109 | /// The sequence of text. 110 | /// 111 | public IList Sequence { get; set; } 112 | 113 | public static implicit operator BatchInput(List sequence) => new BatchInput(sequence); 114 | public static explicit operator List(BatchInput input) => (List)input.Sequence; 115 | 116 | public BatchInput(List sequence) { 117 | Sequence = sequence; 118 | } 119 | 120 | public override string ToString() { 121 | StringBuilder sb = new StringBuilder(); 122 | foreach(string s in Sequence) { 123 | sb.AppendLine(s); 124 | } 125 | return sb.ToString(); 126 | } 127 | } 128 | 129 | /// 130 | /// Base class for inputs that represent pretokenized input for a tokenizer. 131 | /// 132 | public abstract class PretokenizedInput : Input { 133 | 134 | public static implicit operator PretokenizedInput(List pretokenizedText) { 135 | if (pretokenizedText == null) { return null; } 136 | return new PretokenizedSingleInput(pretokenizedText); 137 | } 138 | } 139 | 140 | public class PretokenizedSingleInput : PretokenizedInput { 141 | 142 | /// 143 | /// The tokens. 144 | /// 145 | public List PretokenizedText { get; set; } 146 | 147 | public static explicit operator List(PretokenizedSingleInput input) => input.PretokenizedText; 148 | 149 | public PretokenizedSingleInput(List pretokenizedText) { 150 | PretokenizedText = pretokenizedText; 151 | } 152 | 153 | public override string ToString() { 154 | StringBuilder sb = new StringBuilder(); 155 | foreach (string s in PretokenizedText) { 156 | sb.AppendLine(s); 157 | } 158 | return sb.ToString(); 159 | } 160 | } 161 | 162 | public class PretokenizedBatchInput : PretokenizedInput, IBatchInput { 163 | 164 | /// 165 | /// The sequence of tokens for each input. 166 | /// 167 | public IList Sequence { get; set; } 168 | 169 | public static explicit operator List>(PretokenizedBatchInput input) => (List>)input.Sequence; 170 | 171 | public PretokenizedBatchInput(List> pretokenizedSequences) { 172 | Sequence = pretokenizedSequences; 173 | } 174 | 175 | public override string ToString() { 176 | StringBuilder sb = new StringBuilder(); 177 | foreach (List batch in Sequence) { 178 | foreach (string token in batch) { 179 | sb.AppendLine(token); 180 | } 181 | } 182 | return sb.ToString(); 183 | } 184 | } 185 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/Models/Clip/BasicTokenizer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Globalization; 4 | using System.Linq; 5 | using System.Text; 6 | using static Doji.AI.Transformers.TokenizationUtils; 7 | 8 | namespace Doji.AI.Transformers { 9 | 10 | public class BasicTokenizer { 11 | 12 | /// 13 | /// Whether or not to lowercase the input when tokenizing. 14 | /// 15 | private bool _doLowerCase { get; } 16 | 17 | /// 18 | /// Collection of tokens which will never be split during tokenization. 19 | /// 20 | private List _neverSplit { get; } 21 | 22 | /// 23 | /// Whether or not to tokenize Chinese characters. 24 | /// 25 | private bool _tokenizeChineseChars { get; } 26 | 27 | /// 28 | /// Whether or not to strip all accents. 29 | /// If this option is not specified, then it will be determined 30 | /// by the value for `lowercase` (as in the original BERT). 31 | /// 32 | private bool? _stripAccents { get; } 33 | 34 | /// 35 | /// In some instances we want to skip the basic punctuation splitting 36 | /// so that later tokenization can capture the full context of the words, 37 | /// such as contractions. 38 | /// 39 | private bool _doSplitOnPunc { get; } 40 | 41 | public BasicTokenizer( 42 | bool doLowerCase = true, 43 | List neverSplit = null, 44 | bool tokenizeChineseChars = false, 45 | bool? stripAccents = null, 46 | bool doSplitOnPunc = true 47 | ) { 48 | _doLowerCase = doLowerCase; 49 | _neverSplit = neverSplit ?? new List(); 50 | _tokenizeChineseChars = tokenizeChineseChars; 51 | _stripAccents = stripAccents; 52 | _doSplitOnPunc = doSplitOnPunc; 53 | } 54 | 55 | public List Tokenize(string text) { 56 | text = CleanText(text); 57 | 58 | //TODO: implement tokenize chinese chars 59 | if (_tokenizeChineseChars) { 60 | throw new NotImplementedException("BasicTokenizer currently does not support chinese characters."); 61 | } 62 | 63 | // prevents treating the same character with different unicode codepoints as different characters 64 | string unicodeNormalized = text.Normalize(NormalizationForm.FormC); 65 | List origTokens = WhitespaceTokenize(unicodeNormalized); 66 | List splitTokens = new List(); 67 | 68 | foreach (string token in origTokens) { 69 | if (!_neverSplit.Contains(token)) { 70 | string processedToken = token; 71 | if (_doLowerCase) { 72 | processedToken = token.ToLower(); 73 | if (_stripAccents != false) { 74 | processedToken = RunStripAccents(processedToken); 75 | } 76 | //.AddRange(RunSplitOnPunc(processedToken, _neverSplit)); 77 | } else if (_stripAccents == true) { 78 | processedToken = RunStripAccents(processedToken); 79 | } 80 | splitTokens.AddRange(RunSplitOnPunc(processedToken, _neverSplit)); 81 | } 82 | } 83 | 84 | var outputTokens = WhitespaceTokenize(string.Join(" ", splitTokens)); 85 | return outputTokens; 86 | } 87 | 88 | /// 89 | /// Strips accents from a piece of text. 90 | /// 91 | private static string RunStripAccents(string text) { 92 | text = text.Normalize(NormalizationForm.FormD); 93 | StringBuilder output = new StringBuilder(); 94 | 95 | foreach (char c in text) { 96 | UnicodeCategory cat = CharUnicodeInfo.GetUnicodeCategory(c); 97 | if (cat == UnicodeCategory.NonSpacingMark) { 98 | continue; 99 | } 100 | output.Append(c); 101 | } 102 | 103 | return output.ToString(); 104 | } 105 | 106 | /// 107 | /// Splits punctuation on a piece of text. 108 | /// 109 | private List RunSplitOnPunc(string text, List neverSplit = null) { 110 | if (!_doSplitOnPunc || (neverSplit != null && neverSplit.Contains(text))) { 111 | return new List { text }; 112 | } 113 | 114 | List chars = new List(text.ToCharArray()); 115 | int i = 0; 116 | bool startNewWord = true; 117 | List> output = new List>(); 118 | 119 | while (i < chars.Count) { 120 | char currentChar = chars[i]; 121 | 122 | if (IsPunctuation(currentChar)) { 123 | output.Add(new List { currentChar }); 124 | startNewWord = true; 125 | } else { 126 | if (startNewWord) { 127 | output.Add(new List()); 128 | } 129 | 130 | startNewWord = false; 131 | output[^1].Add(currentChar); 132 | } 133 | 134 | i++; 135 | } 136 | 137 | return output.Select(x => new string(x.ToArray())).ToList(); 138 | } 139 | 140 | /// 141 | /// Performs invalid character removal and whitespace cleanup on text. 142 | /// 143 | private static string CleanText(string text) { 144 | StringBuilder output = new StringBuilder(); 145 | 146 | foreach (char c in text) { 147 | int cp = Convert.ToInt32(c); 148 | 149 | if (cp == 0 || cp == 0xFFFD || IsControl(c)) { 150 | continue; 151 | } 152 | 153 | if (IsWhitespace(c)) { 154 | output.Append(" "); 155 | } else { 156 | output.Append(c); 157 | } 158 | } 159 | 160 | return output.ToString(); 161 | } 162 | 163 | /// 164 | /// Runs basic whitespace cleaning and splitting on a piece of text. 165 | /// 166 | private static List WhitespaceTokenize(string text) { 167 | text = text.Trim(); 168 | 169 | if (string.IsNullOrEmpty(text)) { 170 | return new List(); 171 | } 172 | 173 | string[] tokens = text.Split(); 174 | return new List(tokens); 175 | } 176 | } 177 | } -------------------------------------------------------------------------------- /projects/Transformers/ProjectSettings/InputManager.asset: -------------------------------------------------------------------------------- 1 | %YAML 1.1 2 | %TAG !u! tag:unity3d.com,2011: 3 | --- !u!13 &1 4 | InputManager: 5 | m_ObjectHideFlags: 0 6 | serializedVersion: 2 7 | m_Axes: 8 | - serializedVersion: 3 9 | m_Name: Horizontal 10 | descriptiveName: 11 | descriptiveNegativeName: 12 | negativeButton: left 13 | positiveButton: right 14 | altNegativeButton: a 15 | altPositiveButton: d 16 | gravity: 3 17 | dead: 0.001 18 | sensitivity: 3 19 | snap: 1 20 | invert: 0 21 | type: 0 22 | axis: 0 23 | joyNum: 0 24 | - serializedVersion: 3 25 | m_Name: Vertical 26 | descriptiveName: 27 | descriptiveNegativeName: 28 | negativeButton: down 29 | positiveButton: up 30 | altNegativeButton: s 31 | altPositiveButton: w 32 | gravity: 3 33 | dead: 0.001 34 | sensitivity: 3 35 | snap: 1 36 | invert: 0 37 | type: 0 38 | axis: 0 39 | joyNum: 0 40 | - serializedVersion: 3 41 | m_Name: Fire1 42 | descriptiveName: 43 | descriptiveNegativeName: 44 | negativeButton: 45 | positiveButton: left ctrl 46 | altNegativeButton: 47 | altPositiveButton: mouse 0 48 | gravity: 1000 49 | dead: 0.001 50 | sensitivity: 1000 51 | snap: 0 52 | invert: 0 53 | type: 0 54 | axis: 0 55 | joyNum: 0 56 | - serializedVersion: 3 57 | m_Name: Fire2 58 | descriptiveName: 59 | descriptiveNegativeName: 60 | negativeButton: 61 | positiveButton: left alt 62 | altNegativeButton: 63 | altPositiveButton: mouse 1 64 | gravity: 1000 65 | dead: 0.001 66 | sensitivity: 1000 67 | snap: 0 68 | invert: 0 69 | type: 0 70 | axis: 0 71 | joyNum: 0 72 | - serializedVersion: 3 73 | m_Name: Fire3 74 | descriptiveName: 75 | descriptiveNegativeName: 76 | negativeButton: 77 | positiveButton: left shift 78 | altNegativeButton: 79 | altPositiveButton: mouse 2 80 | gravity: 1000 81 | dead: 0.001 82 | sensitivity: 1000 83 | snap: 0 84 | invert: 0 85 | type: 0 86 | axis: 0 87 | joyNum: 0 88 | - serializedVersion: 3 89 | m_Name: Jump 90 | descriptiveName: 91 | descriptiveNegativeName: 92 | negativeButton: 93 | positiveButton: space 94 | altNegativeButton: 95 | altPositiveButton: 96 | gravity: 1000 97 | dead: 0.001 98 | sensitivity: 1000 99 | snap: 0 100 | invert: 0 101 | type: 0 102 | axis: 0 103 | joyNum: 0 104 | - serializedVersion: 3 105 | m_Name: Mouse X 106 | descriptiveName: 107 | descriptiveNegativeName: 108 | negativeButton: 109 | positiveButton: 110 | altNegativeButton: 111 | altPositiveButton: 112 | gravity: 0 113 | dead: 0 114 | sensitivity: 0.1 115 | snap: 0 116 | invert: 0 117 | type: 1 118 | axis: 0 119 | joyNum: 0 120 | - serializedVersion: 3 121 | m_Name: Mouse Y 122 | descriptiveName: 123 | descriptiveNegativeName: 124 | negativeButton: 125 | positiveButton: 126 | altNegativeButton: 127 | altPositiveButton: 128 | gravity: 0 129 | dead: 0 130 | sensitivity: 0.1 131 | snap: 0 132 | invert: 0 133 | type: 1 134 | axis: 1 135 | joyNum: 0 136 | - serializedVersion: 3 137 | m_Name: Mouse ScrollWheel 138 | descriptiveName: 139 | descriptiveNegativeName: 140 | negativeButton: 141 | positiveButton: 142 | altNegativeButton: 143 | altPositiveButton: 144 | gravity: 0 145 | dead: 0 146 | sensitivity: 0.1 147 | snap: 0 148 | invert: 0 149 | type: 1 150 | axis: 2 151 | joyNum: 0 152 | - serializedVersion: 3 153 | m_Name: Horizontal 154 | descriptiveName: 155 | descriptiveNegativeName: 156 | negativeButton: 157 | positiveButton: 158 | altNegativeButton: 159 | altPositiveButton: 160 | gravity: 0 161 | dead: 0.19 162 | sensitivity: 1 163 | snap: 0 164 | invert: 0 165 | type: 2 166 | axis: 0 167 | joyNum: 0 168 | - serializedVersion: 3 169 | m_Name: Vertical 170 | descriptiveName: 171 | descriptiveNegativeName: 172 | negativeButton: 173 | positiveButton: 174 | altNegativeButton: 175 | altPositiveButton: 176 | gravity: 0 177 | dead: 0.19 178 | sensitivity: 1 179 | snap: 0 180 | invert: 1 181 | type: 2 182 | axis: 1 183 | joyNum: 0 184 | - serializedVersion: 3 185 | m_Name: Fire1 186 | descriptiveName: 187 | descriptiveNegativeName: 188 | negativeButton: 189 | positiveButton: joystick button 0 190 | altNegativeButton: 191 | altPositiveButton: 192 | gravity: 1000 193 | dead: 0.001 194 | sensitivity: 1000 195 | snap: 0 196 | invert: 0 197 | type: 0 198 | axis: 0 199 | joyNum: 0 200 | - serializedVersion: 3 201 | m_Name: Fire2 202 | descriptiveName: 203 | descriptiveNegativeName: 204 | negativeButton: 205 | positiveButton: joystick button 1 206 | altNegativeButton: 207 | altPositiveButton: 208 | gravity: 1000 209 | dead: 0.001 210 | sensitivity: 1000 211 | snap: 0 212 | invert: 0 213 | type: 0 214 | axis: 0 215 | joyNum: 0 216 | - serializedVersion: 3 217 | m_Name: Fire3 218 | descriptiveName: 219 | descriptiveNegativeName: 220 | negativeButton: 221 | positiveButton: joystick button 2 222 | altNegativeButton: 223 | altPositiveButton: 224 | gravity: 1000 225 | dead: 0.001 226 | sensitivity: 1000 227 | snap: 0 228 | invert: 0 229 | type: 0 230 | axis: 0 231 | joyNum: 0 232 | - serializedVersion: 3 233 | m_Name: Jump 234 | descriptiveName: 235 | descriptiveNegativeName: 236 | negativeButton: 237 | positiveButton: joystick button 3 238 | altNegativeButton: 239 | altPositiveButton: 240 | gravity: 1000 241 | dead: 0.001 242 | sensitivity: 1000 243 | snap: 0 244 | invert: 0 245 | type: 0 246 | axis: 0 247 | joyNum: 0 248 | - serializedVersion: 3 249 | m_Name: Submit 250 | descriptiveName: 251 | descriptiveNegativeName: 252 | negativeButton: 253 | positiveButton: return 254 | altNegativeButton: 255 | altPositiveButton: joystick button 0 256 | gravity: 1000 257 | dead: 0.001 258 | sensitivity: 1000 259 | snap: 0 260 | invert: 0 261 | type: 0 262 | axis: 0 263 | joyNum: 0 264 | - serializedVersion: 3 265 | m_Name: Submit 266 | descriptiveName: 267 | descriptiveNegativeName: 268 | negativeButton: 269 | positiveButton: enter 270 | altNegativeButton: 271 | altPositiveButton: space 272 | gravity: 1000 273 | dead: 0.001 274 | sensitivity: 1000 275 | snap: 0 276 | invert: 0 277 | type: 0 278 | axis: 0 279 | joyNum: 0 280 | - serializedVersion: 3 281 | m_Name: Cancel 282 | descriptiveName: 283 | descriptiveNegativeName: 284 | negativeButton: 285 | positiveButton: escape 286 | altNegativeButton: 287 | altPositiveButton: joystick button 1 288 | gravity: 1000 289 | dead: 0.001 290 | sensitivity: 1000 291 | snap: 0 292 | invert: 0 293 | type: 0 294 | axis: 0 295 | joyNum: 0 296 | m_UsePhysicalKeys: 1 297 | -------------------------------------------------------------------------------- /com.doji.transformers/Tests/Editor/ClipTokenizerTest.cs: -------------------------------------------------------------------------------- 1 | using NUnit.Framework; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | 6 | namespace Doji.AI.Transformers.Editor.Tests { 7 | 8 | /// 9 | /// ClipTokenizer test with a reduced vocabulary. 10 | /// 11 | public class ClipTokenizerTest { 12 | 13 | public static IEnumerable TokenizeTestData { 14 | get { 15 | yield return new TestCaseData("lower newer").Returns(new List() { "lo", "w", "er", "n", "e", "w", "er" }); 16 | yield return new TestCaseData("lone loner").Returns(new List() { "lo", "n", "e", "lo", "n", "er" }); 17 | yield return new TestCaseData("new low never hover").Returns(new List() { "n", "e", "w", "low", "n", "e", "v", "er", "h", "o", "v", "er" }); 18 | } 19 | } 20 | 21 | public static IEnumerable EncodeTestData { 22 | get { 23 | yield return new TestCaseData("lower newer").Returns(new List() { 21, 10, 2, 16, 9, 3, 2, 16, 22 }); 24 | yield return new TestCaseData("lone loner").Returns(new List() { 21, 10, 9, 20, 10, 9, 16, 22 }); 25 | yield return new TestCaseData("new low never hover").Returns(new List() { 21, 9, 3, 12, 15, 9, 3, 20, 16, 20, 1, 20, 16, 22 }); 26 | } 27 | } 28 | 29 | private static List BatchInput = new List() { "lower newer", "lone loner", "new low never hover" }; 30 | private static List RoundtripInput = new List() { "lower newer" }; 31 | 32 | public static IEnumerable EncodeBatchTestData { 33 | get { 34 | yield return new TestCaseData(BatchInput, Padding.None).Returns( 35 | new List>() { 36 | new List { 21, 10, 2, 16, 9, 3, 2, 16, 22 }, 37 | new List { 21, 10, 9, 20, 10, 9, 16, 22 }, 38 | new List { 21, 9, 3, 12, 15, 9, 3, 20, 16, 20, 1, 20, 16, 22 } 39 | } 40 | ); 41 | yield return new TestCaseData(BatchInput, Padding.Longest).Returns( 42 | new List>() { 43 | new List { 21, 10, 2, 16, 9, 3, 2, 16, 22, 22, 22, 22, 22, 22 }, 44 | new List { 21, 10, 9, 20, 10, 9, 16, 22, 22, 22, 22, 22, 22, 22 }, 45 | new List { 21, 9, 3, 12, 15, 9, 3, 20, 16, 20, 1, 20, 16, 22 } 46 | } 47 | ); 48 | yield return new TestCaseData(BatchInput, Padding.MaxLength).Returns( 49 | new List>() { 50 | new List { 51 | 21, 10, 2, 16, 9, 3, 2, 16, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 52 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 53 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 54 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 55 | 22, 22, 22, 22, 22, 22, 22, 22 56 | }, 57 | new List { 58 | 21, 10, 9, 20, 10, 9, 16, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 59 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 60 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 61 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 62 | 22, 22, 22, 22, 22, 22, 22, 22 63 | }, 64 | new List { 65 | 21, 9, 3, 12, 15, 9, 3, 20, 16, 20, 1, 20, 16, 22, 22, 22, 22, 22, 22, 66 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 67 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 68 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 69 | 22, 22, 22, 22 70 | } 71 | } 72 | ); 73 | } 74 | } 75 | 76 | private static Dictionary VocabTokens = new Dictionary() { 77 | { "l", 0 }, { "o", 1 }, { "w", 2 }, { "e", 3 }, { "r", 4 }, { "s", 5 }, { "t", 6 }, { "i", 7 }, { "d", 8 }, { "n", 9 }, 78 | { "lo", 10 }, { "l", 11 }, { "w", 12 }, { "r", 13 }, { "t", 14 }, { "low", 15 }, { "er", 16 }, 79 | { "lowest", 17 }, { "newer", 18 }, { "wider", 19 }, { "", 20 }, { "<|startoftext|>", 21 }, { "<|endoftext|>", 22 } 80 | }; 81 | 82 | private static string Merges = "#version: 0.2\n" + "l o\n" + "lo w\n" + "e r\n"; 83 | 84 | [Test] 85 | [TestCaseSource(nameof(TokenizeTestData))] 86 | public List TestTokenize(string text) { 87 | ClipTokenizer t = CreateTokenizer(); 88 | List tokens = t.Tokenize(text); 89 | return tokens; 90 | } 91 | 92 | [Test] 93 | public void TestEncodeType() { 94 | ClipTokenizer t = CreateTokenizer(); 95 | var encoding = t.Encode("lower newer"); 96 | 97 | Assert.IsTrue(encoding.ContainsKey("input_ids"), "Encoded ids not found in 'input_ids'."); 98 | object encodedIds = encoding["input_ids"]; 99 | Assert.IsTrue(encodedIds is ICollection, "Unexpected type for encoded text."); 100 | Assert.IsTrue(encoding is InputEncoding, "Unexpected type for encoding."); 101 | } 102 | 103 | [Test] 104 | [TestCaseSource(nameof(EncodeTestData))] 105 | public IEnumerable TestEncode(string text) { 106 | ClipTokenizer t = CreateTokenizer(); 107 | InputEncoding encoding = t.Encode(text) as InputEncoding; 108 | var encodedIds = encoding.InputIds; 109 | return encodedIds; 110 | } 111 | 112 | [Test] 113 | [TestCaseSource(nameof(EncodeBatchTestData))] 114 | public List> TestEncodeBatch(List prompts, Padding padding) { 115 | ClipTokenizer t = CreateTokenizer(); 116 | BatchEncoding encoding = t.Encode(prompts, padding: padding, maxLength: 77) as BatchEncoding; 117 | var encodedIds = encoding["input_ids"] as List>; 118 | return encodedIds; 119 | } 120 | 121 | [Test] 122 | public void TestRoundtrip([ValueSource(nameof(RoundtripInput))] string prompt) { 123 | ClipTokenizer t = CreateTokenizer(); 124 | var result = t.Decode(t.Encode(prompt).InputIds.ToList(), skipSpecialTokens: true); 125 | Assert.That(result, Is.EqualTo(prompt)); 126 | } 127 | 128 | /// 129 | /// Creates a basic ClipTokenizer with a reduced vocabulary. 130 | /// 131 | private ClipTokenizer CreateTokenizer() { 132 | Vocab vocab = new Vocab(VocabTokens); 133 | TokenizerConfig config = new TokenizerConfig(); 134 | config.UnkToken = ""; 135 | ClipTokenizer tokenizer = new ClipTokenizer(vocab, Merges, config); 136 | return tokenizer; 137 | } 138 | } 139 | } -------------------------------------------------------------------------------- /com.doji.transformers/Runtime/Scripts/TokenizationUtilsBase/SpecialTokensMixin.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | 5 | namespace Doji.AI.Transformers { 6 | 7 | public abstract partial class PreTrainedTokenizerBase : ISpecialTokensMixin { 8 | 9 | public Token BosToken { get; set; } 10 | public Token EosToken { get; set; } 11 | public Token UnkToken { get; set; } 12 | public Token SepToken { get; set; } 13 | public Token PadToken { get; set; } 14 | public Token ClsToken { get; set; } 15 | public Token MaskToken { get; set; } 16 | public List AdditionalSpecialTokens { get; set; } 17 | 18 | public int? BosTokenId { 19 | get { 20 | if (BosToken == null) return null; 21 | return ConvertTokensToIds(BosToken); 22 | } 23 | set { 24 | throw new NotImplementedException(); 25 | } 26 | } 27 | 28 | public int? EosTokenId { 29 | get { 30 | if (EosToken == null) 31 | return null; 32 | return ConvertTokensToIds(EosToken); 33 | } 34 | set { 35 | throw new NotImplementedException(); 36 | } 37 | } 38 | 39 | public int? UnkTokenId { 40 | get { 41 | if (UnkToken == null) 42 | return null; 43 | return ConvertTokensToIds(UnkToken); 44 | } 45 | set { 46 | throw new NotImplementedException(); 47 | } 48 | } 49 | 50 | public int? SepTokenId { 51 | get { 52 | if (SepToken == null) 53 | return null; 54 | return ConvertTokensToIds(SepToken); 55 | } 56 | set { 57 | throw new NotImplementedException(); 58 | } 59 | } 60 | 61 | public int? PadTokenId { 62 | get { 63 | if (PadToken == null) 64 | return null; 65 | return ConvertTokensToIds(PadToken); 66 | } 67 | set { 68 | throw new NotImplementedException(); 69 | } 70 | } 71 | 72 | public int PadTokenTypeID { get; private set; } 73 | 74 | public int? ClsTokenId { 75 | get { 76 | if (ClsToken == null) 77 | return null; 78 | return ConvertTokensToIds(ClsToken); 79 | } 80 | set { 81 | throw new NotImplementedException(); 82 | } 83 | } 84 | 85 | public int? MaskTokenId { 86 | get { 87 | if (MaskToken == null) 88 | return null; 89 | return ConvertTokensToIds(MaskToken); 90 | } 91 | set { 92 | throw new NotImplementedException(); 93 | } 94 | } 95 | 96 | public List AdditionalSpecialTokensIds { get => throw new NotImplementedException(); set => throw new NotImplementedException(); } 97 | 98 | /// 99 | /// A list of the unique special tokens (`''`, `''`, ..., etc.). 100 | /// Convert tokens of `tokenizers.AddedToken` type to string. 101 | /// 102 | public List AllSpecialTokens { 103 | get { 104 | List allToks = new List(); 105 | foreach (var s in AllSpecialTokensExtended) { 106 | allToks.Add(s.ToString()); 107 | } 108 | return allToks; 109 | } 110 | } 111 | 112 | /// 113 | /// List the ids of the special tokens(`''`, `''`, etc.) mapped to class attributes. 114 | /// 115 | public List AllSpecialIds { 116 | get { 117 | return ConvertTokensToIds(AllSpecialTokens); 118 | } 119 | } 120 | 121 | protected abstract List ConvertTokensToIds(List tokens); 122 | protected abstract int ConvertTokensToIds(string tokens); 123 | 124 | public int AddSpecialTokens(Dictionary specialTokensDict, bool replaceAdditionalSpecialTokens = true) { 125 | throw new System.NotImplementedException(); 126 | } 127 | 128 | public int AddTokens(string newTokens) { 129 | throw new System.NotImplementedException(); 130 | } 131 | 132 | public int AddTokens(AddedToken newTokens) { 133 | throw new System.NotImplementedException(); 134 | } 135 | 136 | public int AddTokens(List newTokens) { 137 | throw new System.NotImplementedException(); 138 | } 139 | 140 | HashSet ISpecialTokensMixin.SpecialTokensMap => throw new NotImplementedException(); 141 | 142 | /// 143 | /// A map containing special tokens (`cls_token`, `unk_token`, etc.) 144 | /// 145 | public HashSet SpecialTokensMapExtended { 146 | get { 147 | var tokens = new HashSet( 148 | new Token[] { 149 | BosToken, 150 | EosToken, 151 | UnkToken, 152 | SepToken, 153 | PadToken, 154 | ClsToken, 155 | MaskToken, 156 | }.Where(value => value != null) 157 | ); 158 | return tokens; 159 | } 160 | } 161 | 162 | /// 163 | /// All the special tokens (`''`, `''`, etc.), the order has nothing to do 164 | /// with the index of each tokens. If you want to know the correct indices, check 165 | /// . 166 | /// 167 | /// Don't convert tokens of `tokenizers.AddedToken` type to string so they can be used 168 | /// to control more finely how special tokens are tokenized. 169 | /// 170 | public List AllSpecialTokensExtended { 171 | get { 172 | List allTokens = new List(); 173 | HashSet seen = new HashSet(); 174 | 175 | foreach (Token token in SpecialTokensMapExtended) { 176 | if (!seen.Contains(token)) { 177 | allTokens.Add(token); 178 | seen.Add(token); 179 | } 180 | } 181 | 182 | var tokensToAdd = AdditionalSpecialTokens.Where(token => !seen.Contains(token)); 183 | allTokens.AddRange(tokensToAdd); 184 | seen.UnionWith(tokensToAdd); 185 | 186 | return allTokens; 187 | } 188 | } 189 | 190 | public void InitializeSpecialTokensMixin(List additionalSpecialTokens = null) { 191 | BosToken = Config.BosToken; 192 | EosToken = Config.EosToken; 193 | UnkToken = Config.UnkToken; 194 | SepToken = Config.SepToken; 195 | PadToken = Config.PadToken; 196 | ClsToken = Config.ClsToken; 197 | MaskToken = Config.MaskToken; 198 | PadTokenTypeID = 0; 199 | 200 | if (additionalSpecialTokens != null) { 201 | AdditionalSpecialTokens = additionalSpecialTokens; 202 | } else { 203 | AdditionalSpecialTokens = new List(); 204 | } 205 | } 206 | } 207 | } 208 | --------------------------------------------------------------------------------