├── .github ├── ISSUE_TEMPLATE │ ├── 1_bug-report.yml │ ├── 2_feature_request.yml │ ├── 3_question.yml │ └── config.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── changelog.yml │ ├── docs.yml │ └── release.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── VERSION ├── bin └── transformers ├── composer.json ├── docs ├── .gitignore ├── .vitepress │ └── config.mts ├── README.md ├── audio-classification.md ├── automatic-speech-recognition.md ├── basic-usage.md ├── bun.lockb ├── configuration.md ├── feature-extraction.md ├── fill-mask.md ├── getting-started.md ├── image-classification.md ├── image-feature-extraction.md ├── image-to-image.md ├── image-to-text.md ├── images │ └── detection-example.jpg ├── index.md ├── introduction.md ├── models.md ├── object-detection.md ├── package.json ├── pipelines.md ├── question-answering.md ├── summarization.md ├── text-classification.md ├── text-generation.md ├── text-to-text-generation.md ├── token-classification.md ├── tokenizers.md ├── translation.md ├── utils │ ├── generation.md │ ├── image.md │ └── tensor.md ├── zero-shot-classification.md ├── zero-shot-image-classification.md └── zero-shot-object-detection.md ├── examples ├── .gitignore ├── bootstrap.php ├── composer.json ├── misc │ ├── background-removal.php │ ├── custom-object-detection.php │ ├── general-test.php │ └── image-test.php ├── pipelines │ ├── asr.php │ ├── audio-classification.php │ ├── feature-extraction.php │ ├── fill-mask.php │ ├── image-classification.php │ ├── image-feature-extraction.php │ ├── image-to-image.php │ ├── image-to-text.php │ ├── object-detection.php │ ├── question-answering.php │ ├── sentiment-analysis.php │ ├── summarization.php │ ├── text-classification.php │ ├── text-generation.php │ ├── text2text-generation.php │ ├── token-classification.php │ ├── translation.php │ ├── zero-shot-classification.php │ ├── zero-shot-image-classification.php │ └── zero-shot-object-detection.php └── tokenizers │ └── apply-chat-template.php ├── libs └── .gitignore ├── phpunit.xml ├── scripts ├── convert.py ├── convert_upload_hf.ipynb └── requirements.txt ├── src ├── Commands │ ├── DownloadModelCommand.php │ └── InstallCommand.php ├── DataStructures │ ├── CharTrie.php │ ├── CharTrieNode.php │ ├── TokenLattice.php │ └── TokenLatticeNode.php ├── Decoders │ ├── BPEDecoder.php │ ├── ByteFallback.php │ ├── ByteLevelDecoder.php │ ├── CTCDecoder.php │ ├── Decoder.php │ ├── DecoderSequence.php │ ├── FuseDecoder.php │ ├── MetaspaceDecoder.php │ ├── ReplaceDecoder.php │ ├── StripDecoder.php │ ├── VitsDecoder.php │ └── WordPieceDecoder.php ├── Exceptions │ ├── HubException.php │ ├── MissingModelInputException.php │ ├── ModelExecutionException.php │ ├── TemplateParseException.php │ ├── TransformersException.php │ ├── UnsupportedModelTypeException.php │ └── UnsupportedTaskException.php ├── FFI │ ├── Libc.php │ ├── OnnxRuntime.php │ ├── Samplerate.php │ ├── Sndfile.php │ └── TransformersUtils.php ├── FeatureExtractors │ ├── ASTFeatureExtractor.php │ ├── DetrFeatureExtractor.php │ ├── FeatureExtractor.php │ ├── ImageFeatureExtractor.php │ ├── OwlViTFeatureExtractor.php │ ├── Owlv2ImageProcessor.php │ ├── Swin2SRImageProcessor.php │ ├── ViTFeatureExtractor.php │ ├── Wav2Vec2FeatureExtractor.php │ └── WhisperFeatureExtractor.php ├── Generation │ ├── AggregationStrategy.php │ ├── LogitsProcessors │ │ ├── BadWordsLogitsProcessor.php │ │ ├── ForceTokensLogitsProcessor.php │ │ ├── ForcedBOSTokenLogitsProcessor.php │ │ ├── ForcedEOSTokenLogitsProcessor.php │ │ ├── LogitsProcessor.php │ │ ├── LogitsProcessorList.php │ │ ├── MinLengthLogitsProcessor.php │ │ ├── MinNewTokensLengthLogitsProcessor.php │ │ ├── NoRepeatNGramLogitsProcessor.php │ │ ├── RepetitionPenaltyLogitsProcessor.php │ │ ├── SuppressTokensAtBeginLogitsProcessor.php │ │ └── WhisperTimeStampLogitsProcessor.php │ ├── Samplers │ │ ├── BeamSearchSampler.php │ │ ├── GreedySampler.php │ │ ├── MultinomialSampler.php │ │ └── Sampler.php │ └── Streamers │ │ ├── StdOutStreamer.php │ │ ├── StreamMode.php │ │ ├── Streamer.php │ │ ├── TextStreamer.php │ │ └── WhisperTextStreamer.php ├── Models │ ├── Auto │ │ ├── AutoModel.php │ │ ├── AutoModelForAudioClassification.php │ │ ├── AutoModelForCTC.php │ │ ├── AutoModelForCausalLM.php │ │ ├── AutoModelForImageClassification.php │ │ ├── AutoModelForImageFeatureExtraction.php │ │ ├── AutoModelForImageToImage.php │ │ ├── AutoModelForMaskedLM.php │ │ ├── AutoModelForObjectDetection.php │ │ ├── AutoModelForQuestionAnswering.php │ │ ├── AutoModelForSeq2SeqLM.php │ │ ├── AutoModelForSequenceClassification.php │ │ ├── AutoModelForSpeechSeq2Seq.php │ │ ├── AutoModelForTokenClassification.php │ │ ├── AutoModelForVision2Seq.php │ │ ├── AutoModelForZeroShotObjectDetection.php │ │ └── PretrainedMixin.php │ ├── ModelArchitecture.php │ ├── Output │ │ ├── BaseModelOutput.php │ │ ├── CasualLMOutput.php │ │ ├── DetrSegmentationOutput.php │ │ ├── MaskedLMOutput.php │ │ ├── ModelOutput.php │ │ ├── ObjectDetectionOutput.php │ │ ├── QuestionAnsweringModelOutput.php │ │ ├── SequenceClassifierOutput.php │ │ └── TokenClassifierOutput.php │ └── Pretrained │ │ ├── ASTForAudioClassification.php │ │ ├── ASTModel.php │ │ ├── ASTPretrainedModel.php │ │ ├── AlbertForMaskedLM.php │ │ ├── AlbertForQuestionAnswering.php │ │ ├── AlbertForSequenceClassification.php │ │ ├── AlbertModel.php │ │ ├── AlbertPretrainedModel.php │ │ ├── BartForConditionalGeneration.php │ │ ├── BartForSequenceClassification.php │ │ ├── BartModel.php │ │ ├── BartPretrainedModel.php │ │ ├── BertForMaskedLM.php │ │ ├── BertForQuestionAnswering.php │ │ ├── BertForSequenceClassification.php │ │ ├── BertForTokenClassification.php │ │ ├── BertModel.php │ │ ├── BertPretrainedModel.php │ │ ├── CLIPModel.php │ │ ├── CLIPPretrainedModel.php │ │ ├── CLIPVisionModelWithProjection.php │ │ ├── CodeGenForCausalLM.php │ │ ├── CodeGenModel.php │ │ ├── CodeGenPretrainedModel.php │ │ ├── DebertaForMaskedLM.php │ │ ├── DebertaForQuestionAnswering.php │ │ ├── DebertaForSequenceClassification.php │ │ ├── DebertaForTokenClassification.php │ │ ├── DebertaModel.php │ │ ├── DebertaPretrainedModel.php │ │ ├── DebertaV2ForMaskedLM.php │ │ ├── DebertaV2ForQuestionAnswering.php │ │ ├── DebertaV2ForSequenceClassification.php │ │ ├── DebertaV2ForTokenClassification.php │ │ ├── DebertaV2Model.php │ │ ├── DebertaV2PretrainedModel.php │ │ ├── DeiTForImageClassification.php │ │ ├── DeiTModel.php │ │ ├── DeiTPretrainedModel.php │ │ ├── DetrForObjectDetection.php │ │ ├── DetrForSegmentation.php │ │ ├── DetrModel.php │ │ ├── DetrPretrainedModel.php │ │ ├── DistilBertForMaskedLM.php │ │ ├── DistilBertForQuestionAnswering.php │ │ ├── DistilBertForSequenceClassification.php │ │ ├── DistilBertModel.php │ │ ├── GPT2LMHeadModel.php │ │ ├── GPT2Model.php │ │ ├── GPT2PretrainedModel.php │ │ ├── GPTBigCodeForCausalLM.php │ │ ├── GPTBigCodeModel.php │ │ ├── GPTBigCodePretrainedModel.php │ │ ├── GPTJForCausalLM.php │ │ ├── GPTJModel.php │ │ ├── GPTJPretrainedModel.php │ │ ├── LlamaForCausalLM.php │ │ ├── LlamaModel.php │ │ ├── LlamaPretrainedModel.php │ │ ├── M2M100ForConditionalGeneration.php │ │ ├── M2M100Model.php │ │ ├── M2M100PretrainedModel.php │ │ ├── MobileBertForMaskedLM.php │ │ ├── MobileBertForQuestionAnswering.php │ │ ├── MobileBertForSequenceClassification.php │ │ ├── MobileBertModel.php │ │ ├── MobileBertPretrainedModel.php │ │ ├── OwlViTForObjectDetection.php │ │ ├── OwlViTModel.php │ │ ├── OwlViTPretrainedModel.php │ │ ├── Owlv2ForObjectDetection.php │ │ ├── Owlv2Model.php │ │ ├── Owlv2PretrainedModel.php │ │ ├── PretrainedModel.php │ │ ├── Qwen2ForCausalLM.php │ │ ├── Qwen2Model.php │ │ ├── Qwen2PreTrainedModel.php │ │ ├── RoFormerForMaskedLM.php │ │ ├── RoFormerForQuestionAnswering.php │ │ ├── RoFormerForSequenceClassification.php │ │ ├── RoFormerForTokenClassification.php │ │ ├── RoFormerModel.php │ │ ├── RoFormerPretrainedModel.php │ │ ├── RobertaForMaskedLM.php │ │ ├── RobertaForQuestionAnswering.php │ │ ├── RobertaForSequenceClassification.php │ │ ├── RobertaForTokenClassification.php │ │ ├── RobertaModel.php │ │ ├── RobertaPretrainedModel.php │ │ ├── SiglipModel.php │ │ ├── SiglipPretrainedModel.php │ │ ├── SiglipTextModel.php │ │ ├── SiglipVisionModel.php │ │ ├── Swin2SRForImageSuperResolution.php │ │ ├── Swin2SRModel.php │ │ ├── Swin2SRPretrainedModel.php │ │ ├── T5ForConditionalGeneration.php │ │ ├── T5Model.php │ │ ├── T5PretrainedModel.php │ │ ├── TrOCRForCausalLM.php │ │ ├── TrOCRPretrainedModel.php │ │ ├── ViTForImageClassification.php │ │ ├── ViTModel.php │ │ ├── ViTPretrainedModel.php │ │ ├── VisionEncoderDecoderModel.php │ │ ├── Wav2Vec2ForAudioFrameClassification.php │ │ ├── Wav2Vec2ForCTC.php │ │ ├── Wav2Vec2ForSequenceClassification.php │ │ ├── Wav2Vec2Model.php │ │ ├── Wav2Vec2PretrainedModel.php │ │ ├── WhisperForConditionalGeneration.php │ │ ├── WhisperModel.php │ │ ├── WhisperPretrainedModel.php │ │ ├── YolosForObjectDetection.php │ │ ├── YolosModel.php │ │ └── YolosPretrainedModel.php ├── Normalizers │ ├── BertNormalizer.php │ ├── Lowercase.php │ ├── NFC.php │ ├── NFKC.php │ ├── NFKD.php │ ├── Normalizer.php │ ├── NormalizerSequence.php │ ├── Precompiled.php │ ├── Prepend.php │ ├── Replace.php │ ├── StripAccents.php │ └── StripNormalizer.php ├── Pipelines │ ├── AudioClassificationPipeline.php │ ├── AutomaticSpeechRecognitionPipeline.php │ ├── FeatureExtractionPipeline.php │ ├── FillMaskPipeline.php │ ├── ImageClassificationPipeline.php │ ├── ImageFeatureExtractionPipeline.php │ ├── ImageToImagePipeline.php │ ├── ImageToTextPipeline.php │ ├── ObjectDetectionPipeline.php │ ├── Pipeline.php │ ├── QuestionAnsweringPipeline.php │ ├── SummarizationPipeline.php │ ├── Task.php │ ├── Text2TextGenerationPipeline.php │ ├── TextClassificationPipeline.php │ ├── TextGenerationPipeline.php │ ├── TokenClassificationPipeline.php │ ├── TranslationPipeline.php │ ├── ZeroShotClassificationPipeline.php │ ├── ZeroShotImageClassificationPipeline.php │ └── ZeroShotObjectDetectionPipeline.php ├── PostProcessors │ ├── BertProcessing.php │ ├── ByteLevelPostProcessor.php │ ├── PostProcessedOutput.php │ ├── PostProcessor.php │ ├── PostProcessorSequence.php │ ├── RobertaProcessing.php │ └── TemplateProcessing.php ├── PreTokenizers │ ├── BertPreTokenizer.php │ ├── ByteLevelPreTokenizer.php │ ├── DigitsPreTokenizer.php │ ├── MetaspacePreTokenizer.php │ ├── PreTokenizer.php │ ├── PreTokenizerSequence.php │ ├── PunctuationPreTokenizer.php │ ├── ReplacePreTokenizer.php │ ├── SplitPreTokenizer.php │ ├── WhitespacePreTokenizer.php │ └── WhitespaceSplit.php ├── PreTrainedTokenizers │ ├── AlbertTokenizer.php │ ├── AutoTokenizer.php │ ├── BartTokenizer.php │ ├── BertTokenizer.php │ ├── BlenderbotSmallTokenizer.php │ ├── BlenderbotTokenizer.php │ ├── BloomTokenizer.php │ ├── CLIPTokenizer.php │ ├── CamembertTokenizer.php │ ├── CodeGenTokenizer.php │ ├── CodeLlamaTokenizer.php │ ├── CohereTokenizer.php │ ├── ConvBertTokenizer.php │ ├── DebertaTokenizer.php │ ├── DebertaV2Tokenizer.php │ ├── DistilBertTokenizer.php │ ├── ElectraTokenizer.php │ ├── EsmTokenizer.php │ ├── FalconTokenizer.php │ ├── GPT2Tokenizer.php │ ├── GPTNeoXTokenizer.php │ ├── GemmaTokenizer.php │ ├── Grok1Tokenizer.php │ ├── HerbertTokenizer.php │ ├── LlamaTokenizer.php │ ├── M2M100Tokenizer.php │ ├── MBart50Tokenizer.php │ ├── MBartTokenizer.php │ ├── MPNetTokenizer.php │ ├── MobileBertTokenizer.php │ ├── NllbTokenizer.php │ ├── NougatTokenizer.php │ ├── PreTrainedTokenizer.php │ ├── Qwen2Tokenizer.php │ ├── RoFormerTokenizer.php │ ├── RobertaTokenizer.php │ ├── SiglipTokenizer.php │ ├── SpeechT5Tokenizer.php │ ├── SqueezeBertTokenizer.php │ ├── T5Tokenizer.php │ ├── VitsTokenizer.php │ ├── Wav2Vec2CTCTokenizer.php │ ├── WhisperTokenizer.php │ ├── XLMRobertaTokenizer.php │ └── XLMTokenizer.php ├── Processors │ ├── AutoProcessor.php │ ├── OwlViTProcessor.php │ ├── Processor.php │ ├── Wav2Vec2ProcessorWithLM.php │ └── WhisperProcessor.php ├── Tensor │ ├── MatrixOperator.php │ ├── OpenBLASFactory.php │ ├── Tensor.php │ ├── TensorBuffer.php │ ├── TensorBufferFactory.php │ └── TensorService.php ├── Tokenizers │ ├── AddedToken.php │ ├── BPEModel.php │ ├── BPENode.php │ ├── LegacyModel.php │ ├── TokenizerModel.php │ ├── UnigramModel.php │ └── WordPieceModel.php ├── Transformers.php └── Utils │ ├── Audio.php │ ├── AutoConfig.php │ ├── Downloader.php │ ├── GenerationConfig.php │ ├── Helpers.php │ ├── Hub.php │ ├── Image.php │ ├── ImageDriver.php │ ├── InferenceSession.php │ ├── LibsChecker.php │ ├── Math.php │ ├── Resample.php │ └── StreamLogger.php └── tests ├── Expectations.php ├── Pest.php ├── PipelineTest.php ├── Utils ├── HubTest.php └── StreamLoggerTest.php ├── tensors ├── TensorBufferTest.php └── TensorTest.php └── tokenizers ├── Datasets.php ├── TokenizersTest.php ├── dataset-regular.json └── dataset-templates.json /.github/ISSUE_TEMPLATE/2_feature_request.yml: -------------------------------------------------------------------------------- 1 | name: " Feature request" 2 | description: Submit a proposal/request for a new Transformers PHP feature 3 | labels: [ "enhancement" ] 4 | body: 5 | - type: dropdown 6 | id: feature-type 7 | attributes: 8 | label: Type of feature request 9 | description: "What kind of feature are you requesting?" 10 | options: 11 | - "🌟New Model" 12 | - "🔧New Pipeline" 13 | - "🚀Enhancement" 14 | - "📦Other (please specify)" 15 | validations: 16 | required: true 17 | 18 | - type: textarea 19 | id: feature-description 20 | validations: 21 | required: true 22 | attributes: 23 | label: Feature description 24 | description: | 25 | Please provide a clear and concise description of the feature you are requesting. If the feature is related to a new model or pipeline, include details like its functionality and purpose. 26 | For enhancements, describe the desired change and its benefits. 27 | 28 | - type: textarea 29 | id: motivation 30 | validations: 31 | required: true 32 | attributes: 33 | label: Motivation 34 | description: | 35 | Please outline the motivation for the proposal. Why is it important that we add this feature? What is your intended use case? 36 | 37 | - type: textarea 38 | id: contribution 39 | validations: 40 | required: true 41 | attributes: 42 | label: Your contribution 43 | description: | 44 | Is there any way that you could help, e.g. by submitting a PR? -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/3_question.yml: -------------------------------------------------------------------------------- 1 | name: "❓ Question about Transformers PHP" 2 | description: Ask your questions about using Transformers PHP 3 | labels: [ "question" ] 4 | body: 5 | - type: textarea 6 | id: question 7 | validations: 8 | required: true 9 | attributes: 10 | label: Your question 11 | description: Please clearly state your question about using Transformers PHP. The more details you provide, the better we can assist you. 12 | placeholder: "For example, I'm having trouble understanding how to use the [Model Name] model for sentiment analysis. Can you please provide some guidance?" 13 | 14 | - type: textarea 15 | id: context 16 | validations: 17 | required: false # Optional field 18 | attributes: 19 | label: Context (optional) 20 | description: If necessary, provide additional context about your question. This could include - The specific functionality you're trying to achieve, any code snippets you're working with, or error messages you're encountering (if applicable). 21 | 22 | - type: input 23 | id: reference 24 | validations: 25 | required: false # Optional field 26 | attributes: 27 | label: Reference (optional) 28 | description: If your question relates to specific documentation or code examples, please provide a link here. 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | version: 2.1 3 | contact_links: 4 | - name: Documentation 5 | url: https://codewithkyrian.github.io/transformers-php 6 | about: Read the Transformers PHP documentation -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | ### What: 6 | 7 | - [ ] Bug Fix 8 | - [ ] New Feature 9 | 10 | ### Description: 11 | 12 | 13 | 14 | ### Related: 15 | 16 | 17 | -------------------------------------------------------------------------------- /.github/workflows/changelog.yml: -------------------------------------------------------------------------------- 1 | name: "Update Changelog" 2 | 3 | on: 4 | release: 5 | types: [prereleased, released] 6 | 7 | permissions: 8 | contents: write 9 | 10 | jobs: 11 | update: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v4 17 | with: 18 | ref: main 19 | 20 | - name: Update Changelog 21 | uses: stefanzweifel/changelog-updater-action@v1 22 | with: 23 | latest-version: ${{ github.event.release.name }} 24 | release-notes: ${{ github.event.release.body }} 25 | 26 | - name: Commit updated CHANGELOG 27 | uses: stefanzweifel/git-auto-commit-action@v5 28 | with: 29 | branch: main 30 | commit_message: Update CHANGELOG 31 | file_pattern: CHANGELOG.md -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Docs site to Pages 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: read 11 | pages: write 12 | id-token: write 13 | 14 | concurrency: 15 | group: pages 16 | cancel-in-progress: false 17 | 18 | jobs: 19 | 20 | build: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - name: Checkout 24 | uses: actions/checkout@v4 25 | with: 26 | fetch-depth: 0 # Not needed if lastUpdated is not enabled 27 | 28 | - name: Setup Bun 29 | uses: oven-sh/setup-bun@v1 # Uncomment this if you're using Bun 30 | 31 | - name: Setup Pages 32 | uses: actions/configure-pages@v4 33 | 34 | - name: Install dependencies 35 | working-directory: docs 36 | run: bun install 37 | 38 | - name: Build with VitePress 39 | working-directory: docs 40 | run: bun run docs:build 41 | 42 | - name: Upload artifact 43 | uses: actions/upload-pages-artifact@v3 44 | with: 45 | path: docs/.vitepress/dist 46 | 47 | deploy: 48 | environment: 49 | name: github-pages 50 | url: ${{ steps.deployment.outputs.page_url }} 51 | needs: build 52 | runs-on: ubuntu-latest 53 | name: Deploy 54 | steps: 55 | - name: Deploy to GitHub Pages 56 | id: deployment 57 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Build and Release Libraries 2 | 3 | permissions: 4 | contents: write 5 | packages: read 6 | 7 | on: 8 | release: 9 | types: 10 | - published 11 | 12 | workflow_dispatch: 13 | inputs: 14 | tag: 15 | description: 'Release Tag' 16 | required: true 17 | 18 | 19 | jobs: 20 | add-libs: 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: Log in to GHCR 25 | uses: docker/login-action@v3 26 | with: 27 | registry: ghcr.io 28 | username: ${{ github.actor }} 29 | password: ${{ secrets.GITHUB_TOKEN }} 30 | 31 | - name: Build Libraries 32 | run: | 33 | TAG=${{ startsWith(github.ref, 'refs/tags/') && github.ref_name || github.event.inputs.tag }} 34 | docker run --rm -v ./libs:/libs -e TAG=$TAG ghcr.io/codewithkyrian/transformers-php:latest 35 | ls libs 36 | 37 | - name: Add Libraries to Release 38 | uses: softprops/action-gh-release@v2 39 | with: 40 | files: | 41 | libs/* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .phpunit.cache 2 | .phpunit.result.cache 3 | .php-cs-fixer.cache 4 | .php-cs-fixer.php 5 | 6 | composer.lock 7 | /vendor/ 8 | 9 | .DS_Store 10 | Thumbs.db 11 | 12 | *.swp 13 | *.swo 14 | playground/* 15 | 16 | .idea 17 | .fleet 18 | .vscode 19 | 20 | .transformers-cache/* 21 | tests/models/* 22 | dist -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.5.3 -------------------------------------------------------------------------------- /bin/transformers: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | setName('Transformers PHP CLI'); 14 | 15 | $application->add(new Codewithkyrian\Transformers\Commands\InstallCommand()); 16 | $application->add(new Codewithkyrian\Transformers\Commands\DownloadModelCommand()); 17 | 18 | $application->run(); 19 | } catch (Exception $e) { 20 | echo $e->getMessage(); 21 | exit(1); 22 | } -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | ### OSX ### 2 | # General 3 | .DS_Store 4 | .AppleDouble 5 | .LSOverride 6 | 7 | # Thumbnails 8 | ._* 9 | 10 | 11 | # Files that might appear in the root of a volume 12 | .DocumentRevisions-V100 13 | .fseventsd 14 | .Spotlight-V100 15 | .TemporaryItems 16 | .Trashes 17 | .VolumeIcon.icns 18 | .com.apple.timemachine.donotpresent 19 | .idea 20 | 21 | # Directories potentially created on remote AFP share 22 | .AppleDB 23 | .AppleDesktop 24 | Network Trash Folder 25 | Temporary Items 26 | .apdisk 27 | 28 | ### Node ### 29 | # Logs 30 | logs 31 | *.log 32 | npm-debug.log* 33 | yarn-debug.log* 34 | yarn-error.log* 35 | lerna-debug.log* 36 | 37 | # Dependency directories 38 | node_modules/ 39 | jspm_packages/ 40 | 41 | # dotenv environment variables file 42 | .env 43 | .env.test 44 | 45 | # vitepress build output 46 | .vitepress/dist 47 | .vitepress/cache -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # TransformersPHP Documentation 2 | 3 | Welcome to the official documentation for TransformersPHP. You can find the online version of this documentation 4 | at [https://codewithkyrian.github.io/transformers-docs/](https://codewithkyrian.github.io/transformers-docs/). 5 | 6 | ## Contributing 7 | 8 | If you would like to contribute to the documentation, create a pull request with your changes. The documentation is 9 | written in Markdown so it should be easy to understand and contribute to. -------------------------------------------------------------------------------- /docs/bun.lockb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CodeWithKyrian/transformers-php/6609377bb44275d8a2c8936ec30d62e430f836de/docs/bun.lockb -------------------------------------------------------------------------------- /docs/images/detection-example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CodeWithKyrian/transformers-php/6609377bb44275d8a2c8936ec30d62e430f836de/docs/images/detection-example.jpg -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "devDependencies": { 3 | "vitepress": "^1.0.0-rc.45" 4 | }, 5 | "scripts": { 6 | "docs:dev": "vitepress dev", 7 | "docs:build": "vitepress build", 8 | "docs:preview": "vitepress preview" 9 | } 10 | } -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | vendor 2 | .transformers-cache/* 3 | composer.lock 4 | paddleocr -------------------------------------------------------------------------------- /examples/bootstrap.php: -------------------------------------------------------------------------------- 1 | setCacheDir('/Users/Kyrian/.transformers') 13 | ->setImageDriver(ImageDriver::VIPS) 14 | ->setLogger(new StreamLogger(STDOUT)); 15 | -------------------------------------------------------------------------------- /examples/composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kyrian/examples", 3 | "autoload": { 4 | "psr-4": { 5 | "Kyrian\\Examples\\": "/" 6 | } 7 | }, 8 | "authors": [ 9 | { 10 | "name": "Kyrian Obikwelu", 11 | "email": "koshnawaza@gmail.com" 12 | } 13 | ], 14 | "require": { 15 | "php": "^8.1", 16 | "symfony/console": "^7.0", 17 | "codewithkyrian/transformers": "*" 18 | }, 19 | "require-dev": { 20 | "symfony/var-dumper": "^7.0" 21 | }, 22 | "minimum-stability": "dev", 23 | "repositories": [ 24 | { 25 | "type": "path", 26 | "url": "../" 27 | } 28 | ], 29 | "config": { 30 | "allow-plugins": { 31 | "codewithkyrian/transformers-libraries-downloader": true, 32 | "codewithkyrian/transformers-libsloader": true 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /examples/misc/background-removal.php: -------------------------------------------------------------------------------- 1 | $pixelValues] = $processor($image); 22 | 23 | ['output' => $output] = $model(['input' => $pixelValues]); 24 | // 25 | $mask = Image::fromTensor($output[0]->multiply(255))->resize($image->width(), $image->height()); 26 | // 27 | $mask->save($fileName . '-mask.png'); 28 | // 29 | $maskedImage = $image->applyMask($mask); 30 | // 31 | //$maskedImage->save($fileName . '-masked.png'); -------------------------------------------------------------------------------- /examples/misc/general-test.php: -------------------------------------------------------------------------------- 1 | setImageDriver($imageDriver) 21 | ->apply(); 22 | 23 | $url = __DIR__.'/../images/kyrian-cartoon.jpeg'; 24 | $tensor = Image::read($url) 25 | ->rgb() 26 | ->thumbnail(101, 101) 27 | ->toTensor(); 28 | 29 | dump("$imageDriver->name (toTensor) : ".timeUsage(true)); 30 | 31 | return $tensor; 32 | } 33 | 34 | function fromTensorTest(ImageDriver $imageDriver, Tensor $tensor): Image 35 | { 36 | Transformers::setup() 37 | ->setImageDriver($imageDriver) 38 | ->apply(); 39 | 40 | $image = Image::fromTensor($tensor); 41 | 42 | dump("$imageDriver->name (fromTensor) : ".timeUsage(true)); 43 | 44 | return $image; 45 | } 46 | 47 | 48 | // Run the test 49 | dump("------------ toTensor ------------"); 50 | $tensor = toTensorTest(ImageDriver::IMAGICK); 51 | $tensor = toTensorTest(ImageDriver::GD); 52 | $tensor = toTensorTest(ImageDriver::VIPS); 53 | 54 | 55 | dump("------------ fromTensor ------------"); 56 | $image = fromTensorTest(ImageDriver::IMAGICK, $tensor); 57 | $image = fromTensorTest(ImageDriver::GD, $tensor); 58 | $image = fromTensorTest(ImageDriver::VIPS, $tensor); 59 | 60 | // Save the image 61 | //$image->save('images/images/kyrian-cartoon-converted.jpeg'); 62 | -------------------------------------------------------------------------------- /examples/pipelines/asr.php: -------------------------------------------------------------------------------- 1 | onStream(fn($text) => print($text)); 30 | 31 | 32 | $output = $transcriber($audioUrl, 33 | maxNewTokens: 256, 34 | chunkLengthSecs: 24, 35 | streamer: $streamer, 36 | ); 37 | 38 | dd($output, timeUsage(), memoryUsage()); 39 | -------------------------------------------------------------------------------- /examples/pipelines/audio-classification.php: -------------------------------------------------------------------------------- 1 | developer.'); 16 | 17 | 18 | dd($result); 19 | 20 | -------------------------------------------------------------------------------- /examples/pipelines/image-classification.php: -------------------------------------------------------------------------------- 1 | tokenizer); 16 | 17 | $url = __DIR__ . '/../images/beach.png'; 18 | //$url = __DIR__. '/../images/handwriting.jpg'; 19 | //$url = __DIR__. '/../images/handwriting3.png'; 20 | //$url = __DIR__ . '/../images/handwriting4.jpeg'; 21 | 22 | $output = $captioner($url); 23 | 24 | dd($output, timeUsage(), memoryUsage()); -------------------------------------------------------------------------------- /examples/pipelines/object-detection.php: -------------------------------------------------------------------------------- 1 | drawRectangle($box['xmin'], $box['ymin'], $box['xmax'], $box['ymax'], '0099FF', thickness: 2); 27 | // $image = $image->drawText($item['label'], $box['xmin'], max($box['ymin'] - 5, 0), '/Users/Kyrian/Library/Fonts/JosefinSans-Bold.ttf', 14, '0099FF'); 28 | //} 29 | // 30 | //$image->save(__DIR__ . '/../images/cats-detection.jpg'); 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/pipelines/question-answering.php: -------------------------------------------------------------------------------- 1 | shouldSkipPrompt(); 21 | 22 | $messages = [ 23 | ['role' => 'system', 'content' => 'You are a helpful assistant.'], 24 | ['role' => 'user', 'content' => 'What is diffusion?'], 25 | ]; 26 | 27 | $input = $generator->tokenizer->applyChatTemplate($messages, addGenerationPrompt: true, tokenize: false); 28 | 29 | $output = $generator($input, 30 | streamer: $streamer, 31 | maxNewTokens: 256, 32 | doSample: true, 33 | returnFullText: false, 34 | // temperature: 0.7, 35 | // repetitionPenalty: 1.3, 36 | // earlyStopping: true 37 | ); 38 | 39 | //$generator = pipeline('text-generation', 'Xenova/codegen-350M-mono'); 40 | //$streamer = TextStreamer::make(); 41 | 42 | //$output = $generator( 43 | // 'def fib(n):', 44 | // streamer: $streamer, 45 | // maxNewTokens: 100, 46 | // doSample: true, 47 | // returnFullText: true, 48 | //); 49 | 50 | dd($output[0]['generated_text'], timeUsage(), memoryUsage()); 51 | -------------------------------------------------------------------------------- /examples/pipelines/text2text-generation.php: -------------------------------------------------------------------------------- 1 | 'user', 'content' => 'Hello!'], 13 | ['role' => 'assistant', 'content' => 'Hi! How are you?'], 14 | ['role' => 'user', 'content' => 'I am doing great.'], 15 | ['role' => 'assistant', 'content' => 'That is great to hear.'], 16 | ]; 17 | 18 | $text = $tokenizer->applyChatTemplate($messages, addGenerationPrompt: true, tokenize: false); 19 | 20 | dd($text); 21 | -------------------------------------------------------------------------------- /libs/.gitignore: -------------------------------------------------------------------------------- 1 | /* 2 | !VERSIONS -------------------------------------------------------------------------------- /phpunit.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | ./tests 10 | 11 | 12 | 13 | 14 | ./app 15 | ./src 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | onnxruntime<1.16.0 2 | transformers[torch]==4.33.2 3 | optimum==1.13.2 4 | tqdm 5 | onnx==1.13 6 | -------------------------------------------------------------------------------- /src/DataStructures/CharTrieNode.php: -------------------------------------------------------------------------------- 1 | children[$ch] ??= CharTrieNode::default(); 32 | 33 | return $this->children[$ch]; 34 | } 35 | } -------------------------------------------------------------------------------- /src/DataStructures/TokenLatticeNode.php: -------------------------------------------------------------------------------- 1 | tokenId, $this->nodeId, $this->pos, $this->length, $this->score); 40 | $n->prev = $this->prev; 41 | $n->backtraceScore = $this->backtraceScore; 42 | return $n; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/Decoders/BPEDecoder.php: -------------------------------------------------------------------------------- 1 | suffix = $config['suffix']; 20 | } 21 | 22 | protected function decodeChain(array $tokens): array 23 | { 24 | return array_map(function (string $token, int $i) use ($tokens) { 25 | return str_replace($this->suffix, ($i === count($tokens) - 1) ? '' : ' ', $token); 26 | }, $tokens, array_keys($tokens)); 27 | } 28 | } -------------------------------------------------------------------------------- /src/Decoders/DecoderSequence.php: -------------------------------------------------------------------------------- 1 | decoders = array_map( 23 | fn(array $decoderConfig) => Decoder::fromConfig($decoderConfig), 24 | $config['decoders'] 25 | ); 26 | } 27 | 28 | protected function decodeChain(array $tokens): array 29 | { 30 | return array_reduce( 31 | $this->decoders, 32 | fn(array $tokens, Decoder $decoder) => $decoder->decodeChain($tokens), 33 | $tokens 34 | ); 35 | } 36 | } -------------------------------------------------------------------------------- /src/Decoders/FuseDecoder.php: -------------------------------------------------------------------------------- 1 | addPrefixSpace = $config['add_prefix_space'] ?? false; 31 | $this->replacement = $config['replacement'] ?? ''; 32 | } 33 | 34 | protected function decodeChain(array $tokens): array 35 | { 36 | $result = []; 37 | 38 | foreach ($tokens as $i => $token) { 39 | $normalized = str_replace($this->replacement, ' ', $token); 40 | 41 | if ($this->addPrefixSpace && $i == 0 && str_starts_with($normalized, ' ')) { 42 | $normalized = substr($normalized, 1); 43 | } 44 | 45 | $result[] = $normalized; 46 | } 47 | 48 | return $result; 49 | } 50 | } -------------------------------------------------------------------------------- /src/Decoders/ReplaceDecoder.php: -------------------------------------------------------------------------------- 1 | config['pattern'] ?? null; 21 | 22 | if ($pattern === null) { 23 | return $tokens; 24 | } 25 | 26 | $regex = $pattern['Regex'] ?? null; 27 | $string = $pattern['String'] ?? null; 28 | $replacement = $this->config['content'] ?? ''; 29 | 30 | return array_map(function ($token) use ($regex, $string, $replacement) { 31 | if ($regex !== null) { 32 | return preg_replace("/{$regex}/u", $replacement, (string)$token); 33 | } 34 | if ($string !== null) { 35 | return str_replace($string, $replacement, (string)$token); 36 | } 37 | return $token; 38 | }, $tokens); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/Decoders/StripDecoder.php: -------------------------------------------------------------------------------- 1 | content = $config['content']; 22 | $this->start = $config['start']; 23 | $this->stop = $config['stop']; 24 | } 25 | 26 | protected function decodeChain(array $tokens): array 27 | { 28 | return array_map(function ($token) { 29 | $startCut = 0; 30 | for ($i = 0; $i < $this->start; ++$i) { 31 | $char = mb_substr($token, $i, 1); 32 | if ($char === $this->content) { 33 | $startCut = $i + 1; 34 | continue; 35 | } else { 36 | break; 37 | } 38 | } 39 | 40 | $stopCut = mb_strlen($token); 41 | for ($i = 0; $i < $this->stop; ++$i) { 42 | $index = mb_strlen($token) - $i - 1; 43 | if ($token[$index] ?? null === $this->content) { 44 | $stopCut = $index; 45 | continue; 46 | } else { 47 | break; 48 | } 49 | } 50 | 51 | return mb_substr($token, $startCut, $stopCut - $startCut); 52 | }, $tokens); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/Decoders/VitsDecoder.php: -------------------------------------------------------------------------------- 1 | cleanup = $config['cleanup']; 19 | } 20 | 21 | protected function decodeChain(array $tokens): array 22 | { 23 | $decodedTokens = []; 24 | foreach ($tokens as $i => $token) { 25 | if ($i !== 0) { 26 | if (str_starts_with((string)$token, $this->config['prefix'])) { 27 | // NOTE: Use str_replace to replace only the first occurrence 28 | $token = str_replace($this->config['prefix'], '', $token); 29 | } else { 30 | $token = ' ' . $token; 31 | } 32 | } 33 | if ($this->cleanup) { 34 | $token = TokenizerModel::cleanUpTokenization($token); 35 | } 36 | 37 | $decodedTokens[] = $token; 38 | } 39 | 40 | return $decodedTokens; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/Exceptions/HubException.php: -------------------------------------------------------------------------------- 1 | FFI::cdef( 32 | "\nsize_t mbstowcs(void *wcstr, const char *mbstr, size_t count);", 33 | 'msvcrt.dll' 34 | ), 35 | default => FFI::cdef() 36 | }; 37 | } 38 | 39 | return self::$ffi; 40 | } 41 | 42 | public static function new($type, bool $owned = true, bool $persistent = false): ?CData 43 | { 44 | return self::ffi()->new($type, $owned, $persistent); 45 | } 46 | 47 | public static function mbStringToWcString(CData $wcStr, string $mbStr, int $count): CData 48 | { 49 | $length = self::ffi()->mbstowcs($wcStr, $mbStr, $count); 50 | 51 | if ($length != strlen($mbStr)) { 52 | throw new RuntimeException('Expected mbstowcs to return '.strlen($mbStr).", got $length"); 53 | } 54 | 55 | return $wcStr; 56 | } 57 | 58 | public static function cstring($str): CData 59 | { 60 | $bytes = strlen($str) + 1; 61 | // TODO fix? 62 | $ptr = self::new("char[$bytes]", owned: false); 63 | FFI::memcpy($ptr, $str, $bytes - 1); 64 | $ptr[$bytes - 1] = "\0"; 65 | 66 | return $ptr; 67 | } 68 | } -------------------------------------------------------------------------------- /src/FeatureExtractors/FeatureExtractor.php: -------------------------------------------------------------------------------- 1 | shape(); 25 | } else { 26 | [$imageHeight, $imageWidth, $imageChannels] = $imageTensor->shape(); 27 | } 28 | 29 | // NOTE: For Swin2SR models, the original python implementation adds padding even when the image's width/height is already 30 | // a multiple of `pad_size`. However, this is most likely a bug (PR: https://github.com/mv-lab/swin2sr/pull/19). 31 | // For this reason, we only add padding when the image's width/height is not a multiple of `pad_size`. 32 | $padSize = [ 33 | 'width' => $imageWidth + ($padSize - $imageWidth % $padSize) % $padSize, 34 | 'height' => $imageHeight + ($padSize - $imageHeight % $padSize) % $padSize, 35 | ]; 36 | 37 | return parent::padImage($imageTensor, $padSize, $tensorFormat,'symmetric', false, -1); 38 | } 39 | } -------------------------------------------------------------------------------- /src/FeatureExtractors/ViTFeatureExtractor.php: -------------------------------------------------------------------------------- 1 | config['do_normalize']) 22 | { 23 | $mean = $waveform->mean(); 24 | 25 | //calculate the variance 26 | // $variance = $waveform->add(-$mean)->pow(2)->mean(); 27 | $variance = 0; 28 | for ($i = 0; $i < $waveform->size(); $i++) { 29 | $variance += pow($waveform[$i] - $mean, 2); 30 | } 31 | $variance /= $waveform->size(); 32 | 33 | //normalize the waveform 34 | $waveform = $waveform->add(-$mean)->multiply(1.0 / sqrt($variance + 1e-7)); 35 | } 36 | 37 | $shape = [1, $waveform->size()]; 38 | 39 | return [ 40 | 'input_values' => $waveform->reshape($shape), 41 | 'attention_mask' => Tensor::ones($shape, dtype: Tensor::int64) 42 | ]; 43 | } 44 | } -------------------------------------------------------------------------------- /src/Generation/AggregationStrategy.php: -------------------------------------------------------------------------------- 1 | badWordsIds = $badWordsIds; 19 | $this->eosTokenId = is_array($eosTokenId) ? $eosTokenId : [$eosTokenId]; 20 | } 21 | 22 | /** 23 | * @inheritDoc 24 | */ 25 | public function __invoke(array $inputIds, Tensor $logits): Tensor 26 | { 27 | foreach ($this->badWordsIds as $badWordIds) { 28 | // Whether to modify the logits of the last token in the bad word id sequence 29 | $mark = true; 30 | 31 | // For each bad word in the list, if the current sequence of input ids ends with this sequence (excluding the last), 32 | // then we set the logits of the last bad word id to -Infinity. 33 | for ($i = 1; $i <= count($badWordIds) - 1 && count($badWordIds) < count($inputIds) + 1; ++$i) { 34 | 35 | if ($badWordIds[count($badWordIds) - $i - 1] !== array_slice($inputIds, -$i, 1)[0]) { 36 | $mark = false; 37 | break; 38 | } 39 | } 40 | if ($mark) { 41 | $lastBadWordIdIndex = array_pop($badWordIds); 42 | $logits->buffer()[$lastBadWordIdIndex] = -INF; 43 | } 44 | } 45 | 46 | return $logits; 47 | } 48 | } -------------------------------------------------------------------------------- /src/Generation/LogitsProcessors/ForceTokensLogitsProcessor.php: -------------------------------------------------------------------------------- 1 | forceTokenMap = array_column($forcedDecoderIds, 1, 0); 20 | } 21 | 22 | /** 23 | * Apply the processor to the input logits. 24 | * 25 | * @param Tensor[] $inputIds The input IDs. 26 | * @param Tensor $logits The logits to process. 27 | * @return Tensor The processed logits. 28 | */ 29 | public function __invoke(array $inputIds, Tensor $logits): Tensor 30 | { 31 | $map = $this->forceTokenMap[count($inputIds)] ?? null; // Access length from inputIds 32 | 33 | if ($map) { 34 | Tensor::mo()->la()->fill(-INF, $logits); 35 | 36 | $logits->buffer()[$map] = 0; 37 | } 38 | 39 | return $logits; 40 | } 41 | } -------------------------------------------------------------------------------- /src/Generation/LogitsProcessors/ForcedBOSTokenLogitsProcessor.php: -------------------------------------------------------------------------------- 1 | la()->fill(-INF, $logits); 29 | $logits->buffer()[$this->bosTokenId] = 0; 30 | } 31 | return $logits; 32 | } 33 | } -------------------------------------------------------------------------------- /src/Generation/LogitsProcessors/ForcedEOSTokenLogitsProcessor.php: -------------------------------------------------------------------------------- 1 | = $this->maxLength) { 25 | Tensor::mo()->la()->fill(-INF, $logits); 26 | $logits->buffer()[$this->forcedEosTokenId] = 0; 27 | } 28 | return $logits; 29 | } 30 | } -------------------------------------------------------------------------------- /src/Generation/LogitsProcessors/LogitsProcessor.php: -------------------------------------------------------------------------------- 1 | eosTokenId = [$eosTokenId]; 27 | } 28 | } 29 | 30 | /** 31 | * @inheritDoc 32 | */ 33 | public function __invoke(array $inputIds, Tensor $logits): Tensor 34 | { 35 | if (count($inputIds) < $this->minLength) { 36 | foreach ($this->eosTokenId as $id) { 37 | $logits->buffer()[$id] = -INF; 38 | } 39 | } 40 | return $logits; 41 | } 42 | } -------------------------------------------------------------------------------- /src/Generation/LogitsProcessors/MinNewTokensLengthLogitsProcessor.php: -------------------------------------------------------------------------------- 1 | eosTokenId = is_array($eosTokenId) ? $eosTokenId : [$eosTokenId]; 20 | } 21 | 22 | /** 23 | * @inheritDoc 24 | */ 25 | public function __invoke(array $inputIds, Tensor $logits): Tensor 26 | { 27 | $newTokensLength = count($inputIds) - $this->promptLengthToSkip; 28 | 29 | if ($newTokensLength < $this->minNewTokens) { 30 | foreach ($this->eosTokenId as $eosTokenId) { 31 | $logits->buffer()[$eosTokenId] = -INF; 32 | } 33 | } 34 | 35 | return $logits; 36 | } 37 | } -------------------------------------------------------------------------------- /src/Generation/LogitsProcessors/RepetitionPenaltyLogitsProcessor.php: -------------------------------------------------------------------------------- 1 | buffer()[$inputId] < 0) { 29 | $logits->buffer()[$inputId] *= $this->penalty; 30 | } else { 31 | $logits->buffer()[$inputId] /= $this->penalty; 32 | } 33 | } 34 | return $logits; 35 | } 36 | } -------------------------------------------------------------------------------- /src/Generation/LogitsProcessors/SuppressTokensAtBeginLogitsProcessor.php: -------------------------------------------------------------------------------- 1 | beginIndex) { 30 | foreach ($this->beginSuppressTokens as $token) { 31 | $logits->buffer()[$token] = -INF; 32 | } 33 | } 34 | 35 | return $logits; 36 | } 37 | } -------------------------------------------------------------------------------- /src/Generation/Samplers/BeamSearchSampler.php: -------------------------------------------------------------------------------- 1 | shape()[$logits->ndim() - 1]; 23 | 24 | $k = $this->generationConfig->top_k > 0 25 | ? min($this->generationConfig->top_k, $vocabSize) 26 | : $vocabSize; // defaults to vocab size 27 | 28 | // Get logits of nth token 29 | $logs = $this->getLogits($logits, $index); 30 | 31 | // Get top k tokens 32 | [$topLogits, $topIndices] = $logs->topk($k); 33 | 34 | // Compute softmax over logits 35 | $probabilities = $topLogits->softmax()->toArray(); 36 | 37 | $sampledResults = []; 38 | for ($i = 0; $i < $this->generationConfig->num_beams; $i++) { 39 | $sampledResults[] = [ 40 | $topIndices[$i], // token id 41 | log($probabilities[$i]), // score 42 | ]; 43 | } 44 | 45 | return $sampledResults; 46 | } 47 | } -------------------------------------------------------------------------------- /src/Generation/Samplers/GreedySampler.php: -------------------------------------------------------------------------------- 1 | getLogits($logits, $index); 24 | 25 | // Note: score is meaningless in this context, since we are performing 26 | // greedy search (p = 1 => log(p) = 0) 27 | return [ 28 | [$logs->argMax(), 0] 29 | ]; 30 | } 31 | } -------------------------------------------------------------------------------- /src/Generation/Samplers/MultinomialSampler.php: -------------------------------------------------------------------------------- 1 | shape()[$logits->ndim() - 1]; 22 | 23 | $k = $this->generationConfig->top_k > 0 24 | ? min($this->generationConfig->top_k, $vocabSize) 25 | : $vocabSize; // defaults to vocab size 26 | 27 | // Get logits of nth token 28 | $logs = $this->getLogits($logits, $index); 29 | 30 | // Get top k tokens 31 | [$topLogits, $topIndices] = $logs->topk($k); 32 | 33 | // Compute softmax over logits 34 | $probabilities = $topLogits->softmax()->toArray(); 35 | 36 | $sampledResults = []; 37 | 38 | for ($i = 0; $i < $this->generationConfig->num_beams; $i++) { 39 | $sampledIndex = $this->randomSelect($probabilities); 40 | 41 | $sampledResults[] = [ 42 | $topIndices[$sampledIndex], // token id 43 | log($probabilities[$sampledIndex]), // score 44 | ]; 45 | } 46 | 47 | return $sampledResults; 48 | } 49 | } -------------------------------------------------------------------------------- /src/Generation/Streamers/StdOutStreamer.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\ASTForAudioClassification::class, 12 | 'wav2vec2' => \Codewithkyrian\Transformers\Models\Pretrained\Wav2Vec2ForSequenceClassification::class, 13 | ]; 14 | 15 | const MODEL_CLASS_MAPPINGS = [ 16 | self::MODEL_CLASS_MAPPING, 17 | ]; 18 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForCTC.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\Wav2Vec2ForCTC::class, 12 | ]; 13 | 14 | const MODEL_CLASS_MAPPINGS = [ 15 | self::MODEL_CLASS_MAPPING, 16 | ]; 17 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForCausalLM.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\GPT2LMHeadModel::class, 12 | 'gptj' => \Codewithkyrian\Transformers\Models\Pretrained\GPTJForCausalLM::class, 13 | 'gpt_bigcode' => \Codewithkyrian\Transformers\Models\Pretrained\GPTBigCodeForCausalLM::class, 14 | 'codegen' => \Codewithkyrian\Transformers\Models\Pretrained\CodeGenForCausalLM::class, 15 | 'llama' => \Codewithkyrian\Transformers\Models\Pretrained\LlamaForCausalLM::class, 16 | 'trocr' => \Codewithkyrian\Transformers\Models\Pretrained\TrOCRForCausalLM::class, 17 | 'qwen2' => \Codewithkyrian\Transformers\Models\Pretrained\Qwen2ForCausalLM::class 18 | ]; 19 | 20 | const MODEL_CLASS_MAPPINGS = [ 21 | self::MODEL_CLASS_MAPPING, 22 | ]; 23 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForImageClassification.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\ViTForImageClassification::class, 12 | 'deit' => \Codewithkyrian\Transformers\Models\Pretrained\DeiTForImageClassification::class, 13 | ]; 14 | 15 | const MODEL_CLASS_MAPPINGS = [ 16 | self::MODEL_CLASS_MAPPING, 17 | ]; 18 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForImageFeatureExtraction.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\CLIPVisionModelWithProjection::class, 12 | 'siglip' => \Codewithkyrian\Transformers\Models\Pretrained\SiglipVisionModel::class, 13 | ]; 14 | 15 | const MODEL_CLASS_MAPPINGS = [ 16 | self::MODEL_CLASS_MAPPING, 17 | AutoModel::ENCODER_ONLY_MODEL_MAPPING, 18 | AutoModel::DECODER_ONLY_MODEL_MAPPING, 19 | ]; 20 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForImageToImage.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\Swin2SRForImageSuperResolution::class, 12 | ]; 13 | 14 | const MODEL_CLASS_MAPPINGS = [ 15 | self::MODEL_CLASS_MAPPING, 16 | ]; 17 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForMaskedLM.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\AlbertForMaskedLM::class, 12 | "bert" => \Codewithkyrian\Transformers\Models\Pretrained\BertForMaskedLM::class, 13 | "deberta" => \Codewithkyrian\Transformers\Models\Pretrained\DebertaForMaskedLM::class, 14 | "deberta-v2" => \Codewithkyrian\Transformers\Models\Pretrained\DebertaV2ForMaskedLM::class, 15 | "distilbert" => \Codewithkyrian\Transformers\Models\Pretrained\DistilBertForMaskedLM::class, 16 | "mobilebert" => \Codewithkyrian\Transformers\Models\Pretrained\MobileBertForMaskedLM::class, 17 | "roberta" => \Codewithkyrian\Transformers\Models\Pretrained\RobertaForMaskedLM::class, 18 | "roformer" => \Codewithkyrian\Transformers\Models\Pretrained\RoFormerForMaskedLM::class, 19 | ]; 20 | 21 | const MODEL_CLASS_MAPPINGS = [ 22 | self::MODEL_CLASS_MAPPING, 23 | ]; 24 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForObjectDetection.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\DetrForObjectDetection::class, 12 | 'yolos' => \Codewithkyrian\Transformers\Models\Pretrained\YolosForObjectDetection::class, 13 | ]; 14 | 15 | const MODEL_CLASS_MAPPINGS = [ 16 | self::MODEL_CLASS_MAPPING, 17 | ]; 18 | 19 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForQuestionAnswering.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\AlbertForQuestionAnswering::class, 12 | 'bert' => \Codewithkyrian\Transformers\Models\Pretrained\BertForQuestionAnswering::class, 13 | 'deberta' => \Codewithkyrian\Transformers\Models\Pretrained\DebertaForQuestionAnswering::class, 14 | 'deberta-v2' => \Codewithkyrian\Transformers\Models\Pretrained\DebertaV2ForQuestionAnswering::class, 15 | 'distilbert' => \Codewithkyrian\Transformers\Models\Pretrained\DistilBertForQuestionAnswering::class, 16 | 'mobilebert' => \Codewithkyrian\Transformers\Models\Pretrained\MobileBertForQuestionAnswering::class, 17 | 'roberta' => \Codewithkyrian\Transformers\Models\Pretrained\RobertaForQuestionAnswering::class, 18 | 'roformer' => \Codewithkyrian\Transformers\Models\Pretrained\RoFormerForQuestionAnswering::class, 19 | ]; 20 | 21 | const MODEL_CLASS_MAPPINGS = [ 22 | self::MODEL_CLASS_MAPPING, 23 | ]; 24 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForSeq2SeqLM.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\BartForConditionalGeneration::class, 11 | 't5' => \Codewithkyrian\Transformers\Models\Pretrained\T5ForConditionalGeneration::class, 12 | 'm2m_100' => \Codewithkyrian\Transformers\Models\Pretrained\M2M100ForConditionalGeneration::class, 13 | ]; 14 | 15 | const MODEL_CLASS_MAPPINGS = [ 16 | self::MODEL_CLASS_MAPPING, 17 | ]; 18 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForSequenceClassification.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\AlbertForSequenceClassification::class, 13 | 'bert' => \Codewithkyrian\Transformers\Models\Pretrained\BertForSequenceClassification::class, 14 | 'bart' => \Codewithkyrian\Transformers\Models\Pretrained\BartForSequenceClassification::class, 15 | 'deberta' => \Codewithkyrian\Transformers\Models\Pretrained\DebertaForSequenceClassification::class, 16 | 'deberta-v2' => \Codewithkyrian\Transformers\Models\Pretrained\DebertaV2ForSequenceClassification::class, 17 | 'distilbert' => \Codewithkyrian\Transformers\Models\Pretrained\DistilBertForSequenceClassification::class, 18 | 'mobilebert' => \Codewithkyrian\Transformers\Models\Pretrained\MobileBertForSequenceClassification::class, 19 | 'roberta' => \Codewithkyrian\Transformers\Models\Pretrained\RobertaForSequenceClassification::class, 20 | 'roformer' => \Codewithkyrian\Transformers\Models\Pretrained\RoFormerForSequenceClassification::class, 21 | ]; 22 | 23 | const MODEL_CLASS_MAPPINGS = [ 24 | self::MODEL_CLASS_MAPPING, 25 | ]; 26 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForSpeechSeq2Seq.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\WhisperForConditionalGeneration::class, 12 | ]; 13 | 14 | const MODEL_CLASS_MAPPINGS = [ 15 | self::MODEL_CLASS_MAPPING, 16 | ]; 17 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForTokenClassification.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\BertForTokenClassification::class, 12 | "deberta" => \Codewithkyrian\Transformers\Models\Pretrained\DebertaForTokenClassification::class, 13 | "deberta-v2" => \Codewithkyrian\Transformers\Models\Pretrained\DebertaV2ForTokenClassification::class, 14 | "roberta" => \Codewithkyrian\Transformers\Models\Pretrained\RobertaForTokenClassification::class, 15 | 'roformer' => \Codewithkyrian\Transformers\Models\Pretrained\RoFormerForTokenClassification::class, 16 | ]; 17 | 18 | const MODEL_CLASS_MAPPINGS = [ 19 | self::MODEL_CLASS_MAPPING, 20 | ]; 21 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForVision2Seq.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\VisionEncoderDecoderModel::class 12 | ]; 13 | 14 | const MODEL_CLASS_MAPPINGS = [ 15 | self::MODEL_CLASS_MAPPING, 16 | ]; 17 | } -------------------------------------------------------------------------------- /src/Models/Auto/AutoModelForZeroShotObjectDetection.php: -------------------------------------------------------------------------------- 1 | \Codewithkyrian\Transformers\Models\Pretrained\OwlViTForObjectDetection::class, 12 | 'owlv2' => \Codewithkyrian\Transformers\Models\Pretrained\Owlv2ForObjectDetection::class, 13 | ]; 14 | 15 | const MODEL_CLASS_MAPPINGS = [ 16 | self::MODEL_CLASS_MAPPING, 17 | ]; 18 | 19 | } -------------------------------------------------------------------------------- /src/Models/Output/BaseModelOutput.php: -------------------------------------------------------------------------------- 1 | numDecoderLayers = $this->config['decoder_layers']; 36 | $this->numDecoderHeads = $this->config['decoder_attention_heads']; 37 | $this->decoderDimKv = $this->config['d_model'] / $this->numDecoderHeads; 38 | 39 | $this->numEncoderLayers = $this->config['encoder_layers']; 40 | $this->numEncoderHeads = $this->config['encoder_attention_heads']; 41 | $this->encoderDimKv = $this->config['d_model'] / $this->numEncoderHeads; 42 | } 43 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/BartForSequenceClassification.php: -------------------------------------------------------------------------------- 1 | config['pad_token_id'] = $this->config['eos_token_id']; 29 | $this->config->padTokenId = $this->config['eos_token_id']; 30 | 31 | $this->numHeads = $this->config['n_head']; 32 | $this->numLayers = $this->config['n_layer']; 33 | $this->dimKv = $this->config['n_embd'] / $this->numHeads; 34 | 35 | } 36 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/DebertaForMaskedLM.php: -------------------------------------------------------------------------------- 1 | config['pad_token_id'] = $this->config['eos_token_id']; 30 | $this->config->padTokenId = $this->config['eos_token_id']; 31 | 32 | $this->numHeads = $this->config['n_head']; 33 | $this->numLayers = $this->config['n_layer']; 34 | $this->dimKv = $this->config['n_embd'] / $this->numHeads; 35 | } 36 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/GPTBigCodeForCausalLM.php: -------------------------------------------------------------------------------- 1 | config['pad_token_id'] = $this->config['eos_token_id']; 30 | $this->config->padTokenId = $this->config['eos_token_id']; 31 | 32 | $this->numHeads = $this->config['n_head']; 33 | $this->numLayers = $this->config['n_layer']; 34 | $this->dimKv = $this->config['n_embd'] / $this->numHeads; 35 | } 36 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/GPTJForCausalLM.php: -------------------------------------------------------------------------------- 1 | config['pad_token_id'] = $this->config['eos_token_id']; 29 | $this->config->padTokenId = $this->config['eos_token_id']; 30 | 31 | $this->numHeads = $this->config['n_head']; 32 | $this->numLayers = $this->config['n_layer']; 33 | $this->dimKv = $this->config['n_embd'] / $this->numHeads; 34 | 35 | } 36 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/LlamaForCausalLM.php: -------------------------------------------------------------------------------- 1 | config['pad_token_id'] = $this->config['eos_token_id']; 34 | $this->config->padTokenId = $this->config['eos_token_id']; 35 | 36 | $this->numHeads = $this->config['num_key_value_heads'] ?? $this->config['num_attention_heads']; 37 | $this->numLayers = $this->config['num_hidden_layers']; 38 | $this->dimKv = $this->config['hidden_size'] / $this->config['num_attention_heads']; 39 | } 40 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/M2M100ForConditionalGeneration.php: -------------------------------------------------------------------------------- 1 | numDecoderLayers = $this->config['decoder_layers']; 33 | $this->numDecoderHeads = $this->config['decoder_attention_heads']; 34 | $this->decoderDimKv = $this->config['d_model'] / $this->numDecoderHeads; 35 | 36 | $this->numEncoderLayers = $this->config['encoder_layers']; 37 | $this->numEncoderHeads = $this->config['encoder_attention_heads']; 38 | $this->encoderDimKv = $this->config['d_model'] / $this->numEncoderHeads; 39 | } 40 | 41 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/M2M100Model.php: -------------------------------------------------------------------------------- 1 | config['pad_token_id'] = $this->config['eos_token_id']; 33 | $this->config->padTokenId = $this->config['eos_token_id']; 34 | 35 | $this->numHeads = $this->config['num_key_value_heads'] ?? $this->config['num_attention_heads']; 36 | $this->numLayers = $this->config['num_hidden_layers']; 37 | $this->dimKv = $this->config['hidden_size'] / $this->config['num_attention_heads']; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/Models/Pretrained/RoFormerForMaskedLM.php: -------------------------------------------------------------------------------- 1 | numDecoderLayers = $this->config['num_decoder_layers']; 36 | $this->numDecoderHeads = $this->config['num_heads']; 37 | $this->decoderDimKv = $this->config['d_kv']; 38 | 39 | $this->numEncoderLayers = $this->config['num_layers']; 40 | $this->numEncoderHeads = $this->config['num_heads']; 41 | $this->encoderDimKv = $this->config['d_kv']; 42 | } 43 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/T5Model.php: -------------------------------------------------------------------------------- 1 | numEncoderLayers = $this->numDecoderLayers = $this->config['decoder_layers']; 33 | $this->numEncoderHeads = $this->numDecoderHeads = $this->config['decoder_attention_heads']; 34 | $this->encoderDimKv = $this->decoderDimKv = $this->config['d_model'] / $this->numDecoderHeads; 35 | } 36 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/ViTForImageClassification.php: -------------------------------------------------------------------------------- 1 | toTensor(samplerate: 16000); 19 | * $inputs = $processor($audioTensor); 20 | * 21 | * // Run model with inputs 22 | * $model = AutoModel::from_pretrained('Xenova/mms-300m'); 23 | * $output = $model($inputs); 24 | * // { 25 | * // last_hidden_state: Tensor { 26 | * // shape: [ 1, 1144, 1024 ], 27 | * // dtype: 'float32', 28 | * // buffer: (1171456) [ ... ], 29 | * // size: 1171456 30 | * // } 31 | * // } 32 | * ``` 33 | */ 34 | class Wav2Vec2Model extends Wav2Vec2PretrainedModel 35 | { 36 | 37 | } -------------------------------------------------------------------------------- /src/Models/Pretrained/Wav2Vec2PretrainedModel.php: -------------------------------------------------------------------------------- 1 | new BertNormalizer($config), 25 | 'Precompiled' => new Precompiled($config), 26 | 'Sequence' => new NormalizerSequence($config), 27 | 'Replace' => new Replace($config), 28 | 'NFC' => new NFC($config), 29 | 'NFKC' => new NFKC($config), 30 | 'NFKD' => new NFKD($config), 31 | 'Strip' => new StripNormalizer($config), 32 | 'StripAccents' => new StripAccents($config), 33 | 'Lowercase' => new Lowercase($config), 34 | 'Prepend' => new Prepend($config), 35 | default => throw new \InvalidArgumentException('Unknown normalizer type: ' . $config['type'] ?? null), 36 | }; 37 | } 38 | 39 | abstract public function normalize(string $text): string; 40 | 41 | public function __invoke(): string 42 | { 43 | return $this->normalize(...func_get_args()); 44 | } 45 | } -------------------------------------------------------------------------------- /src/Normalizers/NormalizerSequence.php: -------------------------------------------------------------------------------- 1 | normalizers = array_map( 23 | fn(array $config) => Normalizer::fromConfig($config), 24 | $config['normalizers'] 25 | ); 26 | } 27 | 28 | public function normalize(string $text): string 29 | { 30 | return array_reduce( 31 | $this->normalizers, 32 | fn(string $text, Normalizer $normalizer) => $normalizer->normalize($text), 33 | $text 34 | ); 35 | } 36 | } -------------------------------------------------------------------------------- /src/Normalizers/Prepend.php: -------------------------------------------------------------------------------- 1 | config['prepend'] . $text; 21 | } 22 | } -------------------------------------------------------------------------------- /src/Normalizers/Replace.php: -------------------------------------------------------------------------------- 1 | config['pattern'] ?? null; 16 | 17 | if ($pattern === null) { 18 | return $text; 19 | } 20 | 21 | $regex = $pattern['Regex'] ?? null; 22 | $string = $pattern['String'] ?? null; 23 | $replacement = $this->config['content'] ?? ''; 24 | 25 | if ($regex !== null) { 26 | return preg_replace("/{$regex}/u", $replacement, $text); 27 | } 28 | 29 | if ($string !== null) { 30 | return str_replace($string, $replacement, $text); 31 | } 32 | 33 | return $text; 34 | } 35 | } -------------------------------------------------------------------------------- /src/Normalizers/StripAccents.php: -------------------------------------------------------------------------------- 1 | config['strip_left'] && $this->config['strip_right']) { 23 | // Fast path to avoid an extra trim call 24 | $text = trim($text); 25 | } else { 26 | if ($this->config['strip_left']) { 27 | $text = ltrim($text); 28 | } 29 | if ($this->config['strip_right']) { 30 | $text = rtrim($text); 31 | } 32 | } 33 | return $text; 34 | } 35 | } -------------------------------------------------------------------------------- /src/Pipelines/SummarizationPipeline.php: -------------------------------------------------------------------------------- 1 | 'The Eiffel Tower is about the same height as an 81-storey building and the tallest structure in Paris. It is the second tallest free-standing structure in France after the Millau Viaduct.'] 28 | */ 29 | class SummarizationPipeline extends Text2TextGenerationPipeline 30 | { 31 | protected string $key = 'summary_text'; 32 | } -------------------------------------------------------------------------------- /src/Pipelines/TranslationPipeline.php: -------------------------------------------------------------------------------- 1 | 'La vie est comme une boîte a chocolat.'] 19 | * ``` 20 | * 21 | * *Example:** Multilingual translation w/ `Xenova/m2m100_418M`. 22 | * 23 | * ```php 24 | * use function Codewithkyrian\Transformers\Pipelines\pipeline; 25 | * 26 | * $translator = pipeline('translation', model: 'Xenova/m2m100_418M'); 27 | * 28 | * $output = $translator('生活就像一盒巧克力。', srcLang: 'zh', tgtLang: 'en'); // Chinese to English 29 | * // ['translation_text' => 'Life is like a box of chocolate.'] 30 | * ``` 31 | */ 32 | class TranslationPipeline extends Text2TextGenerationPipeline 33 | { 34 | protected string $key = 'translation_text'; 35 | } -------------------------------------------------------------------------------- /src/PostProcessors/ByteLevelPostProcessor.php: -------------------------------------------------------------------------------- 1 | new BertProcessing($config), 28 | 'ByteLevel' => new ByteLevelPostProcessor($config), 29 | 'TemplateProcessing' => new TemplateProcessing($config), 30 | 'RobertaProcessing' => new RobertaProcessing($config), 31 | 'Sequence' => new PostProcessorSequence($config), 32 | default => throw new \InvalidArgumentException("Unknown post-processor type {$config['type']}"), 33 | }; 34 | } 35 | 36 | /** 37 | * @param array $tokens The input tokens to be post-processed. 38 | * @param array|null $tokenPair The input tokens for the second sequence in a pair. 39 | * @param bool $addSpecialTokens Whether to add the special tokens associated with the corresponding model. 40 | * @return PostProcessedOutput 41 | */ 42 | abstract public function postProcess(array $tokens, ?array $tokenPair = null, bool $addSpecialTokens = true): PostProcessedOutput; 43 | 44 | public function __invoke(array $tokens, ...$args): PostProcessedOutput 45 | { 46 | return $this->postProcess($tokens, ...$args); 47 | } 48 | 49 | } -------------------------------------------------------------------------------- /src/PostProcessors/RobertaProcessing.php: -------------------------------------------------------------------------------- 1 | pattern = "/([$PUNCTUATION_REGEX])|\s+/u"; 25 | } 26 | 27 | protected function preTokenizeText(array|string $text, array $options): array 28 | { 29 | return preg_split($this->pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) ?? []; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/PreTokenizers/DigitsPreTokenizer.php: -------------------------------------------------------------------------------- 1 | config['individual_digits'] ? '' : '+'; 16 | 17 | $digitPattern = "[\D]+|\d$individualDigits"; 18 | 19 | $this->pattern = "/$digitPattern/u"; 20 | } 21 | 22 | public function preTokenizeText(string|array $text, array $options): array 23 | { 24 | preg_match_all($this->pattern, $text, $matches, PREG_SPLIT_NO_EMPTY); 25 | 26 | return $matches[0] ?? []; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/PreTokenizers/PreTokenizerSequence.php: -------------------------------------------------------------------------------- 1 | preTokenizers = array_map( 18 | fn(array $config) => PreTokenizer::fromConfig($config), 19 | $config['pretokenizers'] 20 | ); 21 | } 22 | 23 | public function preTokenizeText(string|array $text, array $options): array 24 | { 25 | return array_reduce( 26 | $this->preTokenizers, 27 | fn($text, PreTokenizer $preTokenizer) => $preTokenizer->preTokenize($text, $options), 28 | [$text] 29 | ); 30 | } 31 | } -------------------------------------------------------------------------------- /src/PreTokenizers/PunctuationPreTokenizer.php: -------------------------------------------------------------------------------- 1 | pattern = "/[^{$PUNCTUATION_REGEX}]+|[{$PUNCTUATION_REGEX}]+/u"; 15 | } 16 | public function preTokenizeText(string|array $text, array $options): array 17 | { 18 | preg_match_all($this->pattern, $text, $matches); 19 | return $matches[0]; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/PreTokenizers/ReplacePreTokenizer.php: -------------------------------------------------------------------------------- 1 | pattern = $config['pattern'] ?? null; 16 | $this->content = $config['content']; 17 | } 18 | public function preTokenizeText(string|array $text, array $options): array 19 | { 20 | if($this->pattern === null) 21 | { 22 | return [$text]; 23 | } 24 | 25 | return str_replace($this->pattern, $this->content, $text); 26 | } 27 | } -------------------------------------------------------------------------------- /src/PreTokenizers/SplitPreTokenizer.php: -------------------------------------------------------------------------------- 1 | pattern = createPattern($config['pattern'], $config['invert']); 17 | } 18 | 19 | 20 | /** 21 | * Tokenizes text by splitting it using the given pattern. 22 | */ 23 | public function preTokenizeText(string|array $text, array $options): array 24 | { 25 | if ($this->config['invert']) { 26 | preg_match_all("/$this->pattern/u", $text, $matches); 27 | return $matches[0]; 28 | } else { 29 | $result = []; 30 | $offset = 0; 31 | 32 | preg_match_all("/$this->pattern/u", $text, $matches, PREG_OFFSET_CAPTURE); 33 | 34 | foreach ($matches[0] as $match) { 35 | $fullMatch = $match[0]; 36 | $matchIndex = $match[1]; 37 | 38 | if ($offset < $matchIndex) { 39 | $result[] = substr($text, $offset, $matchIndex - $offset); 40 | } 41 | 42 | if (strlen($fullMatch) > 0) { 43 | $result[] = $fullMatch; 44 | } 45 | 46 | $offset = $matchIndex + strlen($fullMatch); 47 | } 48 | 49 | if ($offset < strlen($text)) { 50 | $result[] = substr($text, $offset); 51 | } 52 | 53 | return $result; 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/PreTokenizers/WhitespacePreTokenizer.php: -------------------------------------------------------------------------------- 1 | ' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}"; 10 | } 11 | -------------------------------------------------------------------------------- /src/PreTrainedTokenizers/Grok1Tokenizer.php: -------------------------------------------------------------------------------- 1 | decoder = new VitsDecoder([]); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/PreTrainedTokenizers/Wav2Vec2CTCTokenizer.php: -------------------------------------------------------------------------------- 1 | resolveDtype($array); 17 | $dtype = $this->defaultFloatType; 18 | } 19 | return new Tensor($array, $dtype, $shape); 20 | } 21 | } -------------------------------------------------------------------------------- /src/Tensor/OpenBLASFactory.php: -------------------------------------------------------------------------------- 1 | $libFiles 23 | * @param array $lapackeLibs 24 | */ 25 | public function __construct( 26 | string $headerFile, 27 | array $libFiles, 28 | ) 29 | { 30 | if (self::$ffi !== null) { 31 | return; 32 | } 33 | if (!extension_loaded('ffi')) { 34 | return; 35 | } 36 | 37 | $code = file_get_contents($headerFile); 38 | 39 | foreach ($libFiles as $filename) { 40 | try { 41 | $ffi = FFI::cdef($code, $filename); 42 | } catch (FFIException $e) { 43 | continue; 44 | } 45 | 46 | self::$ffi = $ffi; 47 | break; 48 | } 49 | } 50 | 51 | public function isAvailable(): bool 52 | { 53 | return self::$ffi !== null; 54 | } 55 | 56 | public function Blas(): Blas 57 | { 58 | if (self::$ffi == null) { 59 | throw new RuntimeException('openblas library not loaded.'); 60 | } 61 | return new Blas(self::$ffi); 62 | } 63 | 64 | public function Lapack(): PhpLapack 65 | { 66 | return new PhpLapack(); 67 | } 68 | } -------------------------------------------------------------------------------- /src/Tensor/TensorBufferFactory.php: -------------------------------------------------------------------------------- 1 | bufferFactory = new TensorBufferFactory(); 17 | 18 | $this->openblasFactory = new OpenBLASFactory( 19 | headerFile: Library::OpenBlas->header(basePath('includes')), 20 | libFiles: [Library::OpenBlas->library(basePath('libs'))], 21 | ); 22 | 23 | $this->mathFactory = new MatlibFactory( 24 | libFiles: [Library::RindowMatlib->library(basePath('libs'))] 25 | ); 26 | } 27 | } -------------------------------------------------------------------------------- /src/Tokenizers/AddedToken.php: -------------------------------------------------------------------------------- 1 | 'undefined', 22 | self::LANCZOS => 'lanczos', 23 | self::BILINEAR => 'point', 24 | self::BICUBIC => 'cubic', 25 | self::BOX => 'box', 26 | self::HAMMING => 'hamming', 27 | }; 28 | } 29 | 30 | } -------------------------------------------------------------------------------- /tests/Expectations.php: -------------------------------------------------------------------------------- 1 | extend('toMatchArrayApproximately', function (array $expected, float $precision = 0.0001) { 7 | $actual = $this->value; 8 | 9 | expect($actual) 10 | ->toBeArray() 11 | ->and(count($actual)) 12 | ->toBe(count($expected)) 13 | ->and($actual) 14 | ->toHaveKeys(array_keys($expected)); 15 | 16 | foreach ($expected as $key => $expectedValue) { 17 | $actualValue = $actual[$key]; 18 | 19 | if (is_numeric($actualValue)) 20 | { 21 | $message = "Failed asserting that $actualValue at key $key ≈ $expectedValue (±$precision)"; 22 | expect($actualValue) 23 | ->toEqualWithDelta($expectedValue, $precision, $message); 24 | } else 25 | { 26 | $message = "Failed asserting that $actualValue at key $key ≈ $expectedValue"; 27 | expect($actualValue) 28 | ->toEqual($expectedValue, $message); 29 | } 30 | } 31 | 32 | return $this; 33 | }); 34 | -------------------------------------------------------------------------------- /tests/Pest.php: -------------------------------------------------------------------------------- 1 | setCacheDir('tests/models') 13 | ->apply(); 14 | }); 15 | 16 | it('can create a pipeline for a task', function () { 17 | $extractor = pipeline('feature-extraction'); 18 | 19 | expect($extractor)->toBeInstanceOf(FeatureExtractionPipeline::class); 20 | }); 21 | 22 | 23 | it('can create a pipeline for a task with a model', function () { 24 | $extractor = pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); 25 | 26 | expect($extractor)->toBeInstanceOf(FeatureExtractionPipeline::class); 27 | }); 28 | 29 | it('throws an exception when creating a pipeline for an unsupported task', function () { 30 | pipeline('unsupported-task'); 31 | })->throws(UnsupportedTaskException::class); -------------------------------------------------------------------------------- /tests/tensors/TensorBufferTest.php: -------------------------------------------------------------------------------- 1 | tensorBuffer = new TensorBuffer(5, Tensor::float32); 10 | }); 11 | 12 | it('throws an exception when accessing offset with invalid type', fn() => $this->tensorBuffer['offset']) 13 | ->throws(TypeError::class); 14 | 15 | it('can create a zero-sized buffer', function () { 16 | $buffer = new TensorBuffer(0, Tensor::float32); 17 | 18 | expect($buffer->count())->toBe(0); 19 | }); 20 | 21 | it('gets the correct value at the given offset using square brackets', function () { 22 | expect($this->tensorBuffer[0])->toBe(0.0) 23 | ->and($this->tensorBuffer[4])->toBe(0.0); 24 | }); 25 | 26 | it('sets the value at the given offset using square brackets', function () { 27 | $this->tensorBuffer[0] = 1.5; 28 | $this->tensorBuffer[4] = 2.5; 29 | 30 | expect($this->tensorBuffer[0])->toBe(1.5) 31 | ->and($this->tensorBuffer[4])->toBe(2.5); 32 | }); 33 | 34 | it('throws an exception when accessing out-of-range offset', fn() => $this->tensorBuffer[5]) 35 | ->throws(OutOfRangeException::class); 36 | 37 | it('throws an exception when unsetting offset using square brackets', function () { 38 | unset($this->tensorBuffer[0]); 39 | })->throws(LogicException::class); 40 | -------------------------------------------------------------------------------- /tests/tokenizers/Datasets.php: -------------------------------------------------------------------------------- 1 | $tests) { 7 | foreach ($tests as $test) { 8 | $label = is_string($test['input']) ? $test['input'] : json_encode($test['input']); 9 | yield "$tokenizerId: $label" => fn () => [ 10 | 'tokenizerId' => $tokenizerId, 11 | 'test' => $test 12 | ]; 13 | } 14 | } 15 | }); 16 | 17 | dataset('template-tokenization', function () { 18 | $data = json_decode(file_get_contents(__DIR__.'/dataset-templates.json'), true); 19 | 20 | foreach ($data as $tokenizerId => $tests) { 21 | foreach ($tests as $test) { 22 | $printableKeys = ['add_generation_prompt', 'tokenize']; 23 | $label = json_encode(array_intersect_key($test, array_flip($printableKeys))); 24 | yield "$tokenizerId: $label" => fn () => [ 25 | 'tokenizerId' => $tokenizerId, 26 | 'test' => $test 27 | ]; 28 | } 29 | } 30 | }); 31 | --------------------------------------------------------------------------------