├── .github
├── ISSUE_TEMPLATE
│ ├── 1_bug-report.yml
│ ├── 2_feature_request.yml
│ ├── 3_question.yml
│ └── config.yml
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── changelog.yml
│ ├── docs.yml
│ └── release.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── VERSION
├── bin
└── transformers
├── composer.json
├── docs
├── .gitignore
├── .vitepress
│ └── config.mts
├── README.md
├── audio-classification.md
├── automatic-speech-recognition.md
├── basic-usage.md
├── bun.lockb
├── configuration.md
├── feature-extraction.md
├── fill-mask.md
├── getting-started.md
├── image-classification.md
├── image-feature-extraction.md
├── image-to-image.md
├── image-to-text.md
├── images
│ └── detection-example.jpg
├── index.md
├── introduction.md
├── models.md
├── object-detection.md
├── package.json
├── pipelines.md
├── question-answering.md
├── summarization.md
├── text-classification.md
├── text-generation.md
├── text-to-text-generation.md
├── token-classification.md
├── tokenizers.md
├── translation.md
├── utils
│ ├── generation.md
│ ├── image.md
│ └── tensor.md
├── zero-shot-classification.md
├── zero-shot-image-classification.md
└── zero-shot-object-detection.md
├── examples
├── .gitignore
├── bootstrap.php
├── composer.json
├── misc
│ ├── background-removal.php
│ ├── custom-object-detection.php
│ ├── general-test.php
│ └── image-test.php
├── pipelines
│ ├── asr.php
│ ├── audio-classification.php
│ ├── feature-extraction.php
│ ├── fill-mask.php
│ ├── image-classification.php
│ ├── image-feature-extraction.php
│ ├── image-to-image.php
│ ├── image-to-text.php
│ ├── object-detection.php
│ ├── question-answering.php
│ ├── sentiment-analysis.php
│ ├── summarization.php
│ ├── text-classification.php
│ ├── text-generation.php
│ ├── text2text-generation.php
│ ├── token-classification.php
│ ├── translation.php
│ ├── zero-shot-classification.php
│ ├── zero-shot-image-classification.php
│ └── zero-shot-object-detection.php
└── tokenizers
│ └── apply-chat-template.php
├── libs
└── .gitignore
├── phpunit.xml
├── scripts
├── convert.py
├── convert_upload_hf.ipynb
└── requirements.txt
├── src
├── Commands
│ ├── DownloadModelCommand.php
│ └── InstallCommand.php
├── DataStructures
│ ├── CharTrie.php
│ ├── CharTrieNode.php
│ ├── TokenLattice.php
│ └── TokenLatticeNode.php
├── Decoders
│ ├── BPEDecoder.php
│ ├── ByteFallback.php
│ ├── ByteLevelDecoder.php
│ ├── CTCDecoder.php
│ ├── Decoder.php
│ ├── DecoderSequence.php
│ ├── FuseDecoder.php
│ ├── MetaspaceDecoder.php
│ ├── ReplaceDecoder.php
│ ├── StripDecoder.php
│ ├── VitsDecoder.php
│ └── WordPieceDecoder.php
├── Exceptions
│ ├── HubException.php
│ ├── MissingModelInputException.php
│ ├── ModelExecutionException.php
│ ├── TemplateParseException.php
│ ├── TransformersException.php
│ ├── UnsupportedModelTypeException.php
│ └── UnsupportedTaskException.php
├── FFI
│ ├── Libc.php
│ ├── OnnxRuntime.php
│ ├── Samplerate.php
│ ├── Sndfile.php
│ └── TransformersUtils.php
├── FeatureExtractors
│ ├── ASTFeatureExtractor.php
│ ├── DetrFeatureExtractor.php
│ ├── FeatureExtractor.php
│ ├── ImageFeatureExtractor.php
│ ├── OwlViTFeatureExtractor.php
│ ├── Owlv2ImageProcessor.php
│ ├── Swin2SRImageProcessor.php
│ ├── ViTFeatureExtractor.php
│ ├── Wav2Vec2FeatureExtractor.php
│ └── WhisperFeatureExtractor.php
├── Generation
│ ├── AggregationStrategy.php
│ ├── LogitsProcessors
│ │ ├── BadWordsLogitsProcessor.php
│ │ ├── ForceTokensLogitsProcessor.php
│ │ ├── ForcedBOSTokenLogitsProcessor.php
│ │ ├── ForcedEOSTokenLogitsProcessor.php
│ │ ├── LogitsProcessor.php
│ │ ├── LogitsProcessorList.php
│ │ ├── MinLengthLogitsProcessor.php
│ │ ├── MinNewTokensLengthLogitsProcessor.php
│ │ ├── NoRepeatNGramLogitsProcessor.php
│ │ ├── RepetitionPenaltyLogitsProcessor.php
│ │ ├── SuppressTokensAtBeginLogitsProcessor.php
│ │ └── WhisperTimeStampLogitsProcessor.php
│ ├── Samplers
│ │ ├── BeamSearchSampler.php
│ │ ├── GreedySampler.php
│ │ ├── MultinomialSampler.php
│ │ └── Sampler.php
│ └── Streamers
│ │ ├── StdOutStreamer.php
│ │ ├── StreamMode.php
│ │ ├── Streamer.php
│ │ ├── TextStreamer.php
│ │ └── WhisperTextStreamer.php
├── Models
│ ├── Auto
│ │ ├── AutoModel.php
│ │ ├── AutoModelForAudioClassification.php
│ │ ├── AutoModelForCTC.php
│ │ ├── AutoModelForCausalLM.php
│ │ ├── AutoModelForImageClassification.php
│ │ ├── AutoModelForImageFeatureExtraction.php
│ │ ├── AutoModelForImageToImage.php
│ │ ├── AutoModelForMaskedLM.php
│ │ ├── AutoModelForObjectDetection.php
│ │ ├── AutoModelForQuestionAnswering.php
│ │ ├── AutoModelForSeq2SeqLM.php
│ │ ├── AutoModelForSequenceClassification.php
│ │ ├── AutoModelForSpeechSeq2Seq.php
│ │ ├── AutoModelForTokenClassification.php
│ │ ├── AutoModelForVision2Seq.php
│ │ ├── AutoModelForZeroShotObjectDetection.php
│ │ └── PretrainedMixin.php
│ ├── ModelArchitecture.php
│ ├── Output
│ │ ├── BaseModelOutput.php
│ │ ├── CasualLMOutput.php
│ │ ├── DetrSegmentationOutput.php
│ │ ├── MaskedLMOutput.php
│ │ ├── ModelOutput.php
│ │ ├── ObjectDetectionOutput.php
│ │ ├── QuestionAnsweringModelOutput.php
│ │ ├── SequenceClassifierOutput.php
│ │ └── TokenClassifierOutput.php
│ └── Pretrained
│ │ ├── ASTForAudioClassification.php
│ │ ├── ASTModel.php
│ │ ├── ASTPretrainedModel.php
│ │ ├── AlbertForMaskedLM.php
│ │ ├── AlbertForQuestionAnswering.php
│ │ ├── AlbertForSequenceClassification.php
│ │ ├── AlbertModel.php
│ │ ├── AlbertPretrainedModel.php
│ │ ├── BartForConditionalGeneration.php
│ │ ├── BartForSequenceClassification.php
│ │ ├── BartModel.php
│ │ ├── BartPretrainedModel.php
│ │ ├── BertForMaskedLM.php
│ │ ├── BertForQuestionAnswering.php
│ │ ├── BertForSequenceClassification.php
│ │ ├── BertForTokenClassification.php
│ │ ├── BertModel.php
│ │ ├── BertPretrainedModel.php
│ │ ├── CLIPModel.php
│ │ ├── CLIPPretrainedModel.php
│ │ ├── CLIPVisionModelWithProjection.php
│ │ ├── CodeGenForCausalLM.php
│ │ ├── CodeGenModel.php
│ │ ├── CodeGenPretrainedModel.php
│ │ ├── DebertaForMaskedLM.php
│ │ ├── DebertaForQuestionAnswering.php
│ │ ├── DebertaForSequenceClassification.php
│ │ ├── DebertaForTokenClassification.php
│ │ ├── DebertaModel.php
│ │ ├── DebertaPretrainedModel.php
│ │ ├── DebertaV2ForMaskedLM.php
│ │ ├── DebertaV2ForQuestionAnswering.php
│ │ ├── DebertaV2ForSequenceClassification.php
│ │ ├── DebertaV2ForTokenClassification.php
│ │ ├── DebertaV2Model.php
│ │ ├── DebertaV2PretrainedModel.php
│ │ ├── DeiTForImageClassification.php
│ │ ├── DeiTModel.php
│ │ ├── DeiTPretrainedModel.php
│ │ ├── DetrForObjectDetection.php
│ │ ├── DetrForSegmentation.php
│ │ ├── DetrModel.php
│ │ ├── DetrPretrainedModel.php
│ │ ├── DistilBertForMaskedLM.php
│ │ ├── DistilBertForQuestionAnswering.php
│ │ ├── DistilBertForSequenceClassification.php
│ │ ├── DistilBertModel.php
│ │ ├── GPT2LMHeadModel.php
│ │ ├── GPT2Model.php
│ │ ├── GPT2PretrainedModel.php
│ │ ├── GPTBigCodeForCausalLM.php
│ │ ├── GPTBigCodeModel.php
│ │ ├── GPTBigCodePretrainedModel.php
│ │ ├── GPTJForCausalLM.php
│ │ ├── GPTJModel.php
│ │ ├── GPTJPretrainedModel.php
│ │ ├── LlamaForCausalLM.php
│ │ ├── LlamaModel.php
│ │ ├── LlamaPretrainedModel.php
│ │ ├── M2M100ForConditionalGeneration.php
│ │ ├── M2M100Model.php
│ │ ├── M2M100PretrainedModel.php
│ │ ├── MobileBertForMaskedLM.php
│ │ ├── MobileBertForQuestionAnswering.php
│ │ ├── MobileBertForSequenceClassification.php
│ │ ├── MobileBertModel.php
│ │ ├── MobileBertPretrainedModel.php
│ │ ├── OwlViTForObjectDetection.php
│ │ ├── OwlViTModel.php
│ │ ├── OwlViTPretrainedModel.php
│ │ ├── Owlv2ForObjectDetection.php
│ │ ├── Owlv2Model.php
│ │ ├── Owlv2PretrainedModel.php
│ │ ├── PretrainedModel.php
│ │ ├── Qwen2ForCausalLM.php
│ │ ├── Qwen2Model.php
│ │ ├── Qwen2PreTrainedModel.php
│ │ ├── RoFormerForMaskedLM.php
│ │ ├── RoFormerForQuestionAnswering.php
│ │ ├── RoFormerForSequenceClassification.php
│ │ ├── RoFormerForTokenClassification.php
│ │ ├── RoFormerModel.php
│ │ ├── RoFormerPretrainedModel.php
│ │ ├── RobertaForMaskedLM.php
│ │ ├── RobertaForQuestionAnswering.php
│ │ ├── RobertaForSequenceClassification.php
│ │ ├── RobertaForTokenClassification.php
│ │ ├── RobertaModel.php
│ │ ├── RobertaPretrainedModel.php
│ │ ├── SiglipModel.php
│ │ ├── SiglipPretrainedModel.php
│ │ ├── SiglipTextModel.php
│ │ ├── SiglipVisionModel.php
│ │ ├── Swin2SRForImageSuperResolution.php
│ │ ├── Swin2SRModel.php
│ │ ├── Swin2SRPretrainedModel.php
│ │ ├── T5ForConditionalGeneration.php
│ │ ├── T5Model.php
│ │ ├── T5PretrainedModel.php
│ │ ├── TrOCRForCausalLM.php
│ │ ├── TrOCRPretrainedModel.php
│ │ ├── ViTForImageClassification.php
│ │ ├── ViTModel.php
│ │ ├── ViTPretrainedModel.php
│ │ ├── VisionEncoderDecoderModel.php
│ │ ├── Wav2Vec2ForAudioFrameClassification.php
│ │ ├── Wav2Vec2ForCTC.php
│ │ ├── Wav2Vec2ForSequenceClassification.php
│ │ ├── Wav2Vec2Model.php
│ │ ├── Wav2Vec2PretrainedModel.php
│ │ ├── WhisperForConditionalGeneration.php
│ │ ├── WhisperModel.php
│ │ ├── WhisperPretrainedModel.php
│ │ ├── YolosForObjectDetection.php
│ │ ├── YolosModel.php
│ │ └── YolosPretrainedModel.php
├── Normalizers
│ ├── BertNormalizer.php
│ ├── Lowercase.php
│ ├── NFC.php
│ ├── NFKC.php
│ ├── NFKD.php
│ ├── Normalizer.php
│ ├── NormalizerSequence.php
│ ├── Precompiled.php
│ ├── Prepend.php
│ ├── Replace.php
│ ├── StripAccents.php
│ └── StripNormalizer.php
├── Pipelines
│ ├── AudioClassificationPipeline.php
│ ├── AutomaticSpeechRecognitionPipeline.php
│ ├── FeatureExtractionPipeline.php
│ ├── FillMaskPipeline.php
│ ├── ImageClassificationPipeline.php
│ ├── ImageFeatureExtractionPipeline.php
│ ├── ImageToImagePipeline.php
│ ├── ImageToTextPipeline.php
│ ├── ObjectDetectionPipeline.php
│ ├── Pipeline.php
│ ├── QuestionAnsweringPipeline.php
│ ├── SummarizationPipeline.php
│ ├── Task.php
│ ├── Text2TextGenerationPipeline.php
│ ├── TextClassificationPipeline.php
│ ├── TextGenerationPipeline.php
│ ├── TokenClassificationPipeline.php
│ ├── TranslationPipeline.php
│ ├── ZeroShotClassificationPipeline.php
│ ├── ZeroShotImageClassificationPipeline.php
│ └── ZeroShotObjectDetectionPipeline.php
├── PostProcessors
│ ├── BertProcessing.php
│ ├── ByteLevelPostProcessor.php
│ ├── PostProcessedOutput.php
│ ├── PostProcessor.php
│ ├── PostProcessorSequence.php
│ ├── RobertaProcessing.php
│ └── TemplateProcessing.php
├── PreTokenizers
│ ├── BertPreTokenizer.php
│ ├── ByteLevelPreTokenizer.php
│ ├── DigitsPreTokenizer.php
│ ├── MetaspacePreTokenizer.php
│ ├── PreTokenizer.php
│ ├── PreTokenizerSequence.php
│ ├── PunctuationPreTokenizer.php
│ ├── ReplacePreTokenizer.php
│ ├── SplitPreTokenizer.php
│ ├── WhitespacePreTokenizer.php
│ └── WhitespaceSplit.php
├── PreTrainedTokenizers
│ ├── AlbertTokenizer.php
│ ├── AutoTokenizer.php
│ ├── BartTokenizer.php
│ ├── BertTokenizer.php
│ ├── BlenderbotSmallTokenizer.php
│ ├── BlenderbotTokenizer.php
│ ├── BloomTokenizer.php
│ ├── CLIPTokenizer.php
│ ├── CamembertTokenizer.php
│ ├── CodeGenTokenizer.php
│ ├── CodeLlamaTokenizer.php
│ ├── CohereTokenizer.php
│ ├── ConvBertTokenizer.php
│ ├── DebertaTokenizer.php
│ ├── DebertaV2Tokenizer.php
│ ├── DistilBertTokenizer.php
│ ├── ElectraTokenizer.php
│ ├── EsmTokenizer.php
│ ├── FalconTokenizer.php
│ ├── GPT2Tokenizer.php
│ ├── GPTNeoXTokenizer.php
│ ├── GemmaTokenizer.php
│ ├── Grok1Tokenizer.php
│ ├── HerbertTokenizer.php
│ ├── LlamaTokenizer.php
│ ├── M2M100Tokenizer.php
│ ├── MBart50Tokenizer.php
│ ├── MBartTokenizer.php
│ ├── MPNetTokenizer.php
│ ├── MobileBertTokenizer.php
│ ├── NllbTokenizer.php
│ ├── NougatTokenizer.php
│ ├── PreTrainedTokenizer.php
│ ├── Qwen2Tokenizer.php
│ ├── RoFormerTokenizer.php
│ ├── RobertaTokenizer.php
│ ├── SiglipTokenizer.php
│ ├── SpeechT5Tokenizer.php
│ ├── SqueezeBertTokenizer.php
│ ├── T5Tokenizer.php
│ ├── VitsTokenizer.php
│ ├── Wav2Vec2CTCTokenizer.php
│ ├── WhisperTokenizer.php
│ ├── XLMRobertaTokenizer.php
│ └── XLMTokenizer.php
├── Processors
│ ├── AutoProcessor.php
│ ├── OwlViTProcessor.php
│ ├── Processor.php
│ ├── Wav2Vec2ProcessorWithLM.php
│ └── WhisperProcessor.php
├── Tensor
│ ├── MatrixOperator.php
│ ├── OpenBLASFactory.php
│ ├── Tensor.php
│ ├── TensorBuffer.php
│ ├── TensorBufferFactory.php
│ └── TensorService.php
├── Tokenizers
│ ├── AddedToken.php
│ ├── BPEModel.php
│ ├── BPENode.php
│ ├── LegacyModel.php
│ ├── TokenizerModel.php
│ ├── UnigramModel.php
│ └── WordPieceModel.php
├── Transformers.php
└── Utils
│ ├── Audio.php
│ ├── AutoConfig.php
│ ├── Downloader.php
│ ├── GenerationConfig.php
│ ├── Helpers.php
│ ├── Hub.php
│ ├── Image.php
│ ├── ImageDriver.php
│ ├── InferenceSession.php
│ ├── LibsChecker.php
│ ├── Math.php
│ ├── Resample.php
│ └── StreamLogger.php
└── tests
├── Expectations.php
├── Pest.php
├── PipelineTest.php
├── Utils
├── HubTest.php
└── StreamLoggerTest.php
├── tensors
├── TensorBufferTest.php
└── TensorTest.php
└── tokenizers
├── Datasets.php
├── TokenizersTest.php
├── dataset-regular.json
└── dataset-templates.json
/.github/ISSUE_TEMPLATE/2_feature_request.yml:
--------------------------------------------------------------------------------
1 | name: " Feature request"
2 | description: Submit a proposal/request for a new Transformers PHP feature
3 | labels: [ "enhancement" ]
4 | body:
5 | - type: dropdown
6 | id: feature-type
7 | attributes:
8 | label: Type of feature request
9 | description: "What kind of feature are you requesting?"
10 | options:
11 | - "🌟New Model"
12 | - "🔧New Pipeline"
13 | - "🚀Enhancement"
14 | - "📦Other (please specify)"
15 | validations:
16 | required: true
17 |
18 | - type: textarea
19 | id: feature-description
20 | validations:
21 | required: true
22 | attributes:
23 | label: Feature description
24 | description: |
25 | Please provide a clear and concise description of the feature you are requesting. If the feature is related to a new model or pipeline, include details like its functionality and purpose.
26 | For enhancements, describe the desired change and its benefits.
27 |
28 | - type: textarea
29 | id: motivation
30 | validations:
31 | required: true
32 | attributes:
33 | label: Motivation
34 | description: |
35 | Please outline the motivation for the proposal. Why is it important that we add this feature? What is your intended use case?
36 |
37 | - type: textarea
38 | id: contribution
39 | validations:
40 | required: true
41 | attributes:
42 | label: Your contribution
43 | description: |
44 | Is there any way that you could help, e.g. by submitting a PR?
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/3_question.yml:
--------------------------------------------------------------------------------
1 | name: "❓ Question about Transformers PHP"
2 | description: Ask your questions about using Transformers PHP
3 | labels: [ "question" ]
4 | body:
5 | - type: textarea
6 | id: question
7 | validations:
8 | required: true
9 | attributes:
10 | label: Your question
11 | description: Please clearly state your question about using Transformers PHP. The more details you provide, the better we can assist you.
12 | placeholder: "For example, I'm having trouble understanding how to use the [Model Name] model for sentiment analysis. Can you please provide some guidance?"
13 |
14 | - type: textarea
15 | id: context
16 | validations:
17 | required: false # Optional field
18 | attributes:
19 | label: Context (optional)
20 | description: If necessary, provide additional context about your question. This could include - The specific functionality you're trying to achieve, any code snippets you're working with, or error messages you're encountering (if applicable).
21 |
22 | - type: input
23 | id: reference
24 | validations:
25 | required: false # Optional field
26 | attributes:
27 | label: Reference (optional)
28 | description: If your question relates to specific documentation or code examples, please provide a link here.
29 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | version: 2.1
3 | contact_links:
4 | - name: Documentation
5 | url: https://codewithkyrian.github.io/transformers-php
6 | about: Read the Transformers PHP documentation
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
4 |
5 | ### What:
6 |
7 | - [ ] Bug Fix
8 | - [ ] New Feature
9 |
10 | ### Description:
11 |
12 |
13 |
14 | ### Related:
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.github/workflows/changelog.yml:
--------------------------------------------------------------------------------
1 | name: "Update Changelog"
2 |
3 | on:
4 | release:
5 | types: [prereleased, released]
6 |
7 | permissions:
8 | contents: write
9 |
10 | jobs:
11 | update:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - name: Checkout code
16 | uses: actions/checkout@v4
17 | with:
18 | ref: main
19 |
20 | - name: Update Changelog
21 | uses: stefanzweifel/changelog-updater-action@v1
22 | with:
23 | latest-version: ${{ github.event.release.name }}
24 | release-notes: ${{ github.event.release.body }}
25 |
26 | - name: Commit updated CHANGELOG
27 | uses: stefanzweifel/git-auto-commit-action@v5
28 | with:
29 | branch: main
30 | commit_message: Update CHANGELOG
31 | file_pattern: CHANGELOG.md
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: Deploy Docs site to Pages
2 |
3 | on:
4 | push:
5 | branches: [main]
6 |
7 | workflow_dispatch:
8 |
9 | permissions:
10 | contents: read
11 | pages: write
12 | id-token: write
13 |
14 | concurrency:
15 | group: pages
16 | cancel-in-progress: false
17 |
18 | jobs:
19 |
20 | build:
21 | runs-on: ubuntu-latest
22 | steps:
23 | - name: Checkout
24 | uses: actions/checkout@v4
25 | with:
26 | fetch-depth: 0 # Not needed if lastUpdated is not enabled
27 |
28 | - name: Setup Bun
29 | uses: oven-sh/setup-bun@v1 # Uncomment this if you're using Bun
30 |
31 | - name: Setup Pages
32 | uses: actions/configure-pages@v4
33 |
34 | - name: Install dependencies
35 | working-directory: docs
36 | run: bun install
37 |
38 | - name: Build with VitePress
39 | working-directory: docs
40 | run: bun run docs:build
41 |
42 | - name: Upload artifact
43 | uses: actions/upload-pages-artifact@v3
44 | with:
45 | path: docs/.vitepress/dist
46 |
47 | deploy:
48 | environment:
49 | name: github-pages
50 | url: ${{ steps.deployment.outputs.page_url }}
51 | needs: build
52 | runs-on: ubuntu-latest
53 | name: Deploy
54 | steps:
55 | - name: Deploy to GitHub Pages
56 | id: deployment
57 | uses: actions/deploy-pages@v4
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Build and Release Libraries
2 |
3 | permissions:
4 | contents: write
5 | packages: read
6 |
7 | on:
8 | release:
9 | types:
10 | - published
11 |
12 | workflow_dispatch:
13 | inputs:
14 | tag:
15 | description: 'Release Tag'
16 | required: true
17 |
18 |
19 | jobs:
20 | add-libs:
21 | runs-on: ubuntu-latest
22 |
23 | steps:
24 | - name: Log in to GHCR
25 | uses: docker/login-action@v3
26 | with:
27 | registry: ghcr.io
28 | username: ${{ github.actor }}
29 | password: ${{ secrets.GITHUB_TOKEN }}
30 |
31 | - name: Build Libraries
32 | run: |
33 | TAG=${{ startsWith(github.ref, 'refs/tags/') && github.ref_name || github.event.inputs.tag }}
34 | docker run --rm -v ./libs:/libs -e TAG=$TAG ghcr.io/codewithkyrian/transformers-php:latest
35 | ls libs
36 |
37 | - name: Add Libraries to Release
38 | uses: softprops/action-gh-release@v2
39 | with:
40 | files: |
41 | libs/*
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .phpunit.cache
2 | .phpunit.result.cache
3 | .php-cs-fixer.cache
4 | .php-cs-fixer.php
5 |
6 | composer.lock
7 | /vendor/
8 |
9 | .DS_Store
10 | Thumbs.db
11 |
12 | *.swp
13 | *.swo
14 | playground/*
15 |
16 | .idea
17 | .fleet
18 | .vscode
19 |
20 | .transformers-cache/*
21 | tests/models/*
22 | dist
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.5.3
--------------------------------------------------------------------------------
/bin/transformers:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | setName('Transformers PHP CLI');
14 |
15 | $application->add(new Codewithkyrian\Transformers\Commands\InstallCommand());
16 | $application->add(new Codewithkyrian\Transformers\Commands\DownloadModelCommand());
17 |
18 | $application->run();
19 | } catch (Exception $e) {
20 | echo $e->getMessage();
21 | exit(1);
22 | }
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | ### OSX ###
2 | # General
3 | .DS_Store
4 | .AppleDouble
5 | .LSOverride
6 |
7 | # Thumbnails
8 | ._*
9 |
10 |
11 | # Files that might appear in the root of a volume
12 | .DocumentRevisions-V100
13 | .fseventsd
14 | .Spotlight-V100
15 | .TemporaryItems
16 | .Trashes
17 | .VolumeIcon.icns
18 | .com.apple.timemachine.donotpresent
19 | .idea
20 |
21 | # Directories potentially created on remote AFP share
22 | .AppleDB
23 | .AppleDesktop
24 | Network Trash Folder
25 | Temporary Items
26 | .apdisk
27 |
28 | ### Node ###
29 | # Logs
30 | logs
31 | *.log
32 | npm-debug.log*
33 | yarn-debug.log*
34 | yarn-error.log*
35 | lerna-debug.log*
36 |
37 | # Dependency directories
38 | node_modules/
39 | jspm_packages/
40 |
41 | # dotenv environment variables file
42 | .env
43 | .env.test
44 |
45 | # vitepress build output
46 | .vitepress/dist
47 | .vitepress/cache
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # TransformersPHP Documentation
2 |
3 | Welcome to the official documentation for TransformersPHP. You can find the online version of this documentation
4 | at [https://codewithkyrian.github.io/transformers-docs/](https://codewithkyrian.github.io/transformers-docs/).
5 |
6 | ## Contributing
7 |
8 | If you would like to contribute to the documentation, create a pull request with your changes. The documentation is
9 | written in Markdown so it should be easy to understand and contribute to.
--------------------------------------------------------------------------------
/docs/bun.lockb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CodeWithKyrian/transformers-php/6609377bb44275d8a2c8936ec30d62e430f836de/docs/bun.lockb
--------------------------------------------------------------------------------
/docs/images/detection-example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CodeWithKyrian/transformers-php/6609377bb44275d8a2c8936ec30d62e430f836de/docs/images/detection-example.jpg
--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "devDependencies": {
3 | "vitepress": "^1.0.0-rc.45"
4 | },
5 | "scripts": {
6 | "docs:dev": "vitepress dev",
7 | "docs:build": "vitepress build",
8 | "docs:preview": "vitepress preview"
9 | }
10 | }
--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | vendor
2 | .transformers-cache/*
3 | composer.lock
4 | paddleocr
--------------------------------------------------------------------------------
/examples/bootstrap.php:
--------------------------------------------------------------------------------
1 | setCacheDir('/Users/Kyrian/.transformers')
13 | ->setImageDriver(ImageDriver::VIPS)
14 | ->setLogger(new StreamLogger(STDOUT));
15 |
--------------------------------------------------------------------------------
/examples/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "kyrian/examples",
3 | "autoload": {
4 | "psr-4": {
5 | "Kyrian\\Examples\\": "/"
6 | }
7 | },
8 | "authors": [
9 | {
10 | "name": "Kyrian Obikwelu",
11 | "email": "koshnawaza@gmail.com"
12 | }
13 | ],
14 | "require": {
15 | "php": "^8.1",
16 | "symfony/console": "^7.0",
17 | "codewithkyrian/transformers": "*"
18 | },
19 | "require-dev": {
20 | "symfony/var-dumper": "^7.0"
21 | },
22 | "minimum-stability": "dev",
23 | "repositories": [
24 | {
25 | "type": "path",
26 | "url": "../"
27 | }
28 | ],
29 | "config": {
30 | "allow-plugins": {
31 | "codewithkyrian/transformers-libraries-downloader": true,
32 | "codewithkyrian/transformers-libsloader": true
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/examples/misc/background-removal.php:
--------------------------------------------------------------------------------
1 | $pixelValues] = $processor($image);
22 |
23 | ['output' => $output] = $model(['input' => $pixelValues]);
24 | //
25 | $mask = Image::fromTensor($output[0]->multiply(255))->resize($image->width(), $image->height());
26 | //
27 | $mask->save($fileName . '-mask.png');
28 | //
29 | $maskedImage = $image->applyMask($mask);
30 | //
31 | //$maskedImage->save($fileName . '-masked.png');
--------------------------------------------------------------------------------
/examples/misc/general-test.php:
--------------------------------------------------------------------------------
1 | setImageDriver($imageDriver)
21 | ->apply();
22 |
23 | $url = __DIR__.'/../images/kyrian-cartoon.jpeg';
24 | $tensor = Image::read($url)
25 | ->rgb()
26 | ->thumbnail(101, 101)
27 | ->toTensor();
28 |
29 | dump("$imageDriver->name (toTensor) : ".timeUsage(true));
30 |
31 | return $tensor;
32 | }
33 |
34 | function fromTensorTest(ImageDriver $imageDriver, Tensor $tensor): Image
35 | {
36 | Transformers::setup()
37 | ->setImageDriver($imageDriver)
38 | ->apply();
39 |
40 | $image = Image::fromTensor($tensor);
41 |
42 | dump("$imageDriver->name (fromTensor) : ".timeUsage(true));
43 |
44 | return $image;
45 | }
46 |
47 |
48 | // Run the test
49 | dump("------------ toTensor ------------");
50 | $tensor = toTensorTest(ImageDriver::IMAGICK);
51 | $tensor = toTensorTest(ImageDriver::GD);
52 | $tensor = toTensorTest(ImageDriver::VIPS);
53 |
54 |
55 | dump("------------ fromTensor ------------");
56 | $image = fromTensorTest(ImageDriver::IMAGICK, $tensor);
57 | $image = fromTensorTest(ImageDriver::GD, $tensor);
58 | $image = fromTensorTest(ImageDriver::VIPS, $tensor);
59 |
60 | // Save the image
61 | //$image->save('images/images/kyrian-cartoon-converted.jpeg');
62 |
--------------------------------------------------------------------------------
/examples/pipelines/asr.php:
--------------------------------------------------------------------------------
1 | onStream(fn($text) => print($text));
30 |
31 |
32 | $output = $transcriber($audioUrl,
33 | maxNewTokens: 256,
34 | chunkLengthSecs: 24,
35 | streamer: $streamer,
36 | );
37 |
38 | dd($output, timeUsage(), memoryUsage());
39 |
--------------------------------------------------------------------------------
/examples/pipelines/audio-classification.php:
--------------------------------------------------------------------------------
1 | developer.');
16 |
17 |
18 | dd($result);
19 |
20 |
--------------------------------------------------------------------------------
/examples/pipelines/image-classification.php:
--------------------------------------------------------------------------------
1 | tokenizer);
16 |
17 | $url = __DIR__ . '/../images/beach.png';
18 | //$url = __DIR__. '/../images/handwriting.jpg';
19 | //$url = __DIR__. '/../images/handwriting3.png';
20 | //$url = __DIR__ . '/../images/handwriting4.jpeg';
21 |
22 | $output = $captioner($url);
23 |
24 | dd($output, timeUsage(), memoryUsage());
--------------------------------------------------------------------------------
/examples/pipelines/object-detection.php:
--------------------------------------------------------------------------------
1 | drawRectangle($box['xmin'], $box['ymin'], $box['xmax'], $box['ymax'], '0099FF', thickness: 2);
27 | // $image = $image->drawText($item['label'], $box['xmin'], max($box['ymin'] - 5, 0), '/Users/Kyrian/Library/Fonts/JosefinSans-Bold.ttf', 14, '0099FF');
28 | //}
29 | //
30 | //$image->save(__DIR__ . '/../images/cats-detection.jpg');
31 |
32 |
33 |
--------------------------------------------------------------------------------
/examples/pipelines/question-answering.php:
--------------------------------------------------------------------------------
1 | shouldSkipPrompt();
21 |
22 | $messages = [
23 | ['role' => 'system', 'content' => 'You are a helpful assistant.'],
24 | ['role' => 'user', 'content' => 'What is diffusion?'],
25 | ];
26 |
27 | $input = $generator->tokenizer->applyChatTemplate($messages, addGenerationPrompt: true, tokenize: false);
28 |
29 | $output = $generator($input,
30 | streamer: $streamer,
31 | maxNewTokens: 256,
32 | doSample: true,
33 | returnFullText: false,
34 | // temperature: 0.7,
35 | // repetitionPenalty: 1.3,
36 | // earlyStopping: true
37 | );
38 |
39 | //$generator = pipeline('text-generation', 'Xenova/codegen-350M-mono');
40 | //$streamer = TextStreamer::make();
41 |
42 | //$output = $generator(
43 | // 'def fib(n):',
44 | // streamer: $streamer,
45 | // maxNewTokens: 100,
46 | // doSample: true,
47 | // returnFullText: true,
48 | //);
49 |
50 | dd($output[0]['generated_text'], timeUsage(), memoryUsage());
51 |
--------------------------------------------------------------------------------
/examples/pipelines/text2text-generation.php:
--------------------------------------------------------------------------------
1 | 'user', 'content' => 'Hello!'],
13 | ['role' => 'assistant', 'content' => 'Hi! How are you?'],
14 | ['role' => 'user', 'content' => 'I am doing great.'],
15 | ['role' => 'assistant', 'content' => 'That is great to hear.'],
16 | ];
17 |
18 | $text = $tokenizer->applyChatTemplate($messages, addGenerationPrompt: true, tokenize: false);
19 |
20 | dd($text);
21 |
--------------------------------------------------------------------------------
/libs/.gitignore:
--------------------------------------------------------------------------------
1 | /*
2 | !VERSIONS
--------------------------------------------------------------------------------
/phpunit.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
9 | ./tests
10 |
11 |
12 |
13 |
14 | ./app
15 | ./src
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | onnxruntime<1.16.0
2 | transformers[torch]==4.33.2
3 | optimum==1.13.2
4 | tqdm
5 | onnx==1.13
6 |
--------------------------------------------------------------------------------
/src/DataStructures/CharTrieNode.php:
--------------------------------------------------------------------------------
1 | children[$ch] ??= CharTrieNode::default();
32 |
33 | return $this->children[$ch];
34 | }
35 | }
--------------------------------------------------------------------------------
/src/DataStructures/TokenLatticeNode.php:
--------------------------------------------------------------------------------
1 | tokenId, $this->nodeId, $this->pos, $this->length, $this->score);
40 | $n->prev = $this->prev;
41 | $n->backtraceScore = $this->backtraceScore;
42 | return $n;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/Decoders/BPEDecoder.php:
--------------------------------------------------------------------------------
1 | suffix = $config['suffix'];
20 | }
21 |
22 | protected function decodeChain(array $tokens): array
23 | {
24 | return array_map(function (string $token, int $i) use ($tokens) {
25 | return str_replace($this->suffix, ($i === count($tokens) - 1) ? '' : ' ', $token);
26 | }, $tokens, array_keys($tokens));
27 | }
28 | }
--------------------------------------------------------------------------------
/src/Decoders/DecoderSequence.php:
--------------------------------------------------------------------------------
1 | decoders = array_map(
23 | fn(array $decoderConfig) => Decoder::fromConfig($decoderConfig),
24 | $config['decoders']
25 | );
26 | }
27 |
28 | protected function decodeChain(array $tokens): array
29 | {
30 | return array_reduce(
31 | $this->decoders,
32 | fn(array $tokens, Decoder $decoder) => $decoder->decodeChain($tokens),
33 | $tokens
34 | );
35 | }
36 | }
--------------------------------------------------------------------------------
/src/Decoders/FuseDecoder.php:
--------------------------------------------------------------------------------
1 | addPrefixSpace = $config['add_prefix_space'] ?? false;
31 | $this->replacement = $config['replacement'] ?? '';
32 | }
33 |
34 | protected function decodeChain(array $tokens): array
35 | {
36 | $result = [];
37 |
38 | foreach ($tokens as $i => $token) {
39 | $normalized = str_replace($this->replacement, ' ', $token);
40 |
41 | if ($this->addPrefixSpace && $i == 0 && str_starts_with($normalized, ' ')) {
42 | $normalized = substr($normalized, 1);
43 | }
44 |
45 | $result[] = $normalized;
46 | }
47 |
48 | return $result;
49 | }
50 | }
--------------------------------------------------------------------------------
/src/Decoders/ReplaceDecoder.php:
--------------------------------------------------------------------------------
1 | config['pattern'] ?? null;
21 |
22 | if ($pattern === null) {
23 | return $tokens;
24 | }
25 |
26 | $regex = $pattern['Regex'] ?? null;
27 | $string = $pattern['String'] ?? null;
28 | $replacement = $this->config['content'] ?? '';
29 |
30 | return array_map(function ($token) use ($regex, $string, $replacement) {
31 | if ($regex !== null) {
32 | return preg_replace("/{$regex}/u", $replacement, (string)$token);
33 | }
34 | if ($string !== null) {
35 | return str_replace($string, $replacement, (string)$token);
36 | }
37 | return $token;
38 | }, $tokens);
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/Decoders/StripDecoder.php:
--------------------------------------------------------------------------------
1 | content = $config['content'];
22 | $this->start = $config['start'];
23 | $this->stop = $config['stop'];
24 | }
25 |
26 | protected function decodeChain(array $tokens): array
27 | {
28 | return array_map(function ($token) {
29 | $startCut = 0;
30 | for ($i = 0; $i < $this->start; ++$i) {
31 | $char = mb_substr($token, $i, 1);
32 | if ($char === $this->content) {
33 | $startCut = $i + 1;
34 | continue;
35 | } else {
36 | break;
37 | }
38 | }
39 |
40 | $stopCut = mb_strlen($token);
41 | for ($i = 0; $i < $this->stop; ++$i) {
42 | $index = mb_strlen($token) - $i - 1;
43 | if ($token[$index] ?? null === $this->content) {
44 | $stopCut = $index;
45 | continue;
46 | } else {
47 | break;
48 | }
49 | }
50 |
51 | return mb_substr($token, $startCut, $stopCut - $startCut);
52 | }, $tokens);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/Decoders/VitsDecoder.php:
--------------------------------------------------------------------------------
1 | cleanup = $config['cleanup'];
19 | }
20 |
21 | protected function decodeChain(array $tokens): array
22 | {
23 | $decodedTokens = [];
24 | foreach ($tokens as $i => $token) {
25 | if ($i !== 0) {
26 | if (str_starts_with((string)$token, $this->config['prefix'])) {
27 | // NOTE: Use str_replace to replace only the first occurrence
28 | $token = str_replace($this->config['prefix'], '', $token);
29 | } else {
30 | $token = ' ' . $token;
31 | }
32 | }
33 | if ($this->cleanup) {
34 | $token = TokenizerModel::cleanUpTokenization($token);
35 | }
36 |
37 | $decodedTokens[] = $token;
38 | }
39 |
40 | return $decodedTokens;
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/Exceptions/HubException.php:
--------------------------------------------------------------------------------
1 | FFI::cdef(
32 | "\nsize_t mbstowcs(void *wcstr, const char *mbstr, size_t count);",
33 | 'msvcrt.dll'
34 | ),
35 | default => FFI::cdef()
36 | };
37 | }
38 |
39 | return self::$ffi;
40 | }
41 |
42 | public static function new($type, bool $owned = true, bool $persistent = false): ?CData
43 | {
44 | return self::ffi()->new($type, $owned, $persistent);
45 | }
46 |
47 | public static function mbStringToWcString(CData $wcStr, string $mbStr, int $count): CData
48 | {
49 | $length = self::ffi()->mbstowcs($wcStr, $mbStr, $count);
50 |
51 | if ($length != strlen($mbStr)) {
52 | throw new RuntimeException('Expected mbstowcs to return '.strlen($mbStr).", got $length");
53 | }
54 |
55 | return $wcStr;
56 | }
57 |
58 | public static function cstring($str): CData
59 | {
60 | $bytes = strlen($str) + 1;
61 | // TODO fix?
62 | $ptr = self::new("char[$bytes]", owned: false);
63 | FFI::memcpy($ptr, $str, $bytes - 1);
64 | $ptr[$bytes - 1] = "\0";
65 |
66 | return $ptr;
67 | }
68 | }
--------------------------------------------------------------------------------
/src/FeatureExtractors/FeatureExtractor.php:
--------------------------------------------------------------------------------
1 | shape();
25 | } else {
26 | [$imageHeight, $imageWidth, $imageChannels] = $imageTensor->shape();
27 | }
28 |
29 | // NOTE: For Swin2SR models, the original python implementation adds padding even when the image's width/height is already
30 | // a multiple of `pad_size`. However, this is most likely a bug (PR: https://github.com/mv-lab/swin2sr/pull/19).
31 | // For this reason, we only add padding when the image's width/height is not a multiple of `pad_size`.
32 | $padSize = [
33 | 'width' => $imageWidth + ($padSize - $imageWidth % $padSize) % $padSize,
34 | 'height' => $imageHeight + ($padSize - $imageHeight % $padSize) % $padSize,
35 | ];
36 |
37 | return parent::padImage($imageTensor, $padSize, $tensorFormat,'symmetric', false, -1);
38 | }
39 | }
--------------------------------------------------------------------------------
/src/FeatureExtractors/ViTFeatureExtractor.php:
--------------------------------------------------------------------------------
1 | config['do_normalize'])
22 | {
23 | $mean = $waveform->mean();
24 |
25 | //calculate the variance
26 | // $variance = $waveform->add(-$mean)->pow(2)->mean();
27 | $variance = 0;
28 | for ($i = 0; $i < $waveform->size(); $i++) {
29 | $variance += pow($waveform[$i] - $mean, 2);
30 | }
31 | $variance /= $waveform->size();
32 |
33 | //normalize the waveform
34 | $waveform = $waveform->add(-$mean)->multiply(1.0 / sqrt($variance + 1e-7));
35 | }
36 |
37 | $shape = [1, $waveform->size()];
38 |
39 | return [
40 | 'input_values' => $waveform->reshape($shape),
41 | 'attention_mask' => Tensor::ones($shape, dtype: Tensor::int64)
42 | ];
43 | }
44 | }
--------------------------------------------------------------------------------
/src/Generation/AggregationStrategy.php:
--------------------------------------------------------------------------------
1 | badWordsIds = $badWordsIds;
19 | $this->eosTokenId = is_array($eosTokenId) ? $eosTokenId : [$eosTokenId];
20 | }
21 |
22 | /**
23 | * @inheritDoc
24 | */
25 | public function __invoke(array $inputIds, Tensor $logits): Tensor
26 | {
27 | foreach ($this->badWordsIds as $badWordIds) {
28 | // Whether to modify the logits of the last token in the bad word id sequence
29 | $mark = true;
30 |
31 | // For each bad word in the list, if the current sequence of input ids ends with this sequence (excluding the last),
32 | // then we set the logits of the last bad word id to -Infinity.
33 | for ($i = 1; $i <= count($badWordIds) - 1 && count($badWordIds) < count($inputIds) + 1; ++$i) {
34 |
35 | if ($badWordIds[count($badWordIds) - $i - 1] !== array_slice($inputIds, -$i, 1)[0]) {
36 | $mark = false;
37 | break;
38 | }
39 | }
40 | if ($mark) {
41 | $lastBadWordIdIndex = array_pop($badWordIds);
42 | $logits->buffer()[$lastBadWordIdIndex] = -INF;
43 | }
44 | }
45 |
46 | return $logits;
47 | }
48 | }
--------------------------------------------------------------------------------
/src/Generation/LogitsProcessors/ForceTokensLogitsProcessor.php:
--------------------------------------------------------------------------------
1 | forceTokenMap = array_column($forcedDecoderIds, 1, 0);
20 | }
21 |
22 | /**
23 | * Apply the processor to the input logits.
24 | *
25 | * @param Tensor[] $inputIds The input IDs.
26 | * @param Tensor $logits The logits to process.
27 | * @return Tensor The processed logits.
28 | */
29 | public function __invoke(array $inputIds, Tensor $logits): Tensor
30 | {
31 | $map = $this->forceTokenMap[count($inputIds)] ?? null; // Access length from inputIds
32 |
33 | if ($map) {
34 | Tensor::mo()->la()->fill(-INF, $logits);
35 |
36 | $logits->buffer()[$map] = 0;
37 | }
38 |
39 | return $logits;
40 | }
41 | }
--------------------------------------------------------------------------------
/src/Generation/LogitsProcessors/ForcedBOSTokenLogitsProcessor.php:
--------------------------------------------------------------------------------
1 | la()->fill(-INF, $logits);
29 | $logits->buffer()[$this->bosTokenId] = 0;
30 | }
31 | return $logits;
32 | }
33 | }
--------------------------------------------------------------------------------
/src/Generation/LogitsProcessors/ForcedEOSTokenLogitsProcessor.php:
--------------------------------------------------------------------------------
1 | = $this->maxLength) {
25 | Tensor::mo()->la()->fill(-INF, $logits);
26 | $logits->buffer()[$this->forcedEosTokenId] = 0;
27 | }
28 | return $logits;
29 | }
30 | }
--------------------------------------------------------------------------------
/src/Generation/LogitsProcessors/LogitsProcessor.php:
--------------------------------------------------------------------------------
1 | eosTokenId = [$eosTokenId];
27 | }
28 | }
29 |
30 | /**
31 | * @inheritDoc
32 | */
33 | public function __invoke(array $inputIds, Tensor $logits): Tensor
34 | {
35 | if (count($inputIds) < $this->minLength) {
36 | foreach ($this->eosTokenId as $id) {
37 | $logits->buffer()[$id] = -INF;
38 | }
39 | }
40 | return $logits;
41 | }
42 | }
--------------------------------------------------------------------------------
/src/Generation/LogitsProcessors/MinNewTokensLengthLogitsProcessor.php:
--------------------------------------------------------------------------------
1 | eosTokenId = is_array($eosTokenId) ? $eosTokenId : [$eosTokenId];
20 | }
21 |
22 | /**
23 | * @inheritDoc
24 | */
25 | public function __invoke(array $inputIds, Tensor $logits): Tensor
26 | {
27 | $newTokensLength = count($inputIds) - $this->promptLengthToSkip;
28 |
29 | if ($newTokensLength < $this->minNewTokens) {
30 | foreach ($this->eosTokenId as $eosTokenId) {
31 | $logits->buffer()[$eosTokenId] = -INF;
32 | }
33 | }
34 |
35 | return $logits;
36 | }
37 | }
--------------------------------------------------------------------------------
/src/Generation/LogitsProcessors/RepetitionPenaltyLogitsProcessor.php:
--------------------------------------------------------------------------------
1 | buffer()[$inputId] < 0) {
29 | $logits->buffer()[$inputId] *= $this->penalty;
30 | } else {
31 | $logits->buffer()[$inputId] /= $this->penalty;
32 | }
33 | }
34 | return $logits;
35 | }
36 | }
--------------------------------------------------------------------------------
/src/Generation/LogitsProcessors/SuppressTokensAtBeginLogitsProcessor.php:
--------------------------------------------------------------------------------
1 | beginIndex) {
30 | foreach ($this->beginSuppressTokens as $token) {
31 | $logits->buffer()[$token] = -INF;
32 | }
33 | }
34 |
35 | return $logits;
36 | }
37 | }
--------------------------------------------------------------------------------
/src/Generation/Samplers/BeamSearchSampler.php:
--------------------------------------------------------------------------------
1 | shape()[$logits->ndim() - 1];
23 |
24 | $k = $this->generationConfig->top_k > 0
25 | ? min($this->generationConfig->top_k, $vocabSize)
26 | : $vocabSize; // defaults to vocab size
27 |
28 | // Get logits of nth token
29 | $logs = $this->getLogits($logits, $index);
30 |
31 | // Get top k tokens
32 | [$topLogits, $topIndices] = $logs->topk($k);
33 |
34 | // Compute softmax over logits
35 | $probabilities = $topLogits->softmax()->toArray();
36 |
37 | $sampledResults = [];
38 | for ($i = 0; $i < $this->generationConfig->num_beams; $i++) {
39 | $sampledResults[] = [
40 | $topIndices[$i], // token id
41 | log($probabilities[$i]), // score
42 | ];
43 | }
44 |
45 | return $sampledResults;
46 | }
47 | }
--------------------------------------------------------------------------------
/src/Generation/Samplers/GreedySampler.php:
--------------------------------------------------------------------------------
1 | getLogits($logits, $index);
24 |
25 | // Note: score is meaningless in this context, since we are performing
26 | // greedy search (p = 1 => log(p) = 0)
27 | return [
28 | [$logs->argMax(), 0]
29 | ];
30 | }
31 | }
--------------------------------------------------------------------------------
/src/Generation/Samplers/MultinomialSampler.php:
--------------------------------------------------------------------------------
1 | shape()[$logits->ndim() - 1];
22 |
23 | $k = $this->generationConfig->top_k > 0
24 | ? min($this->generationConfig->top_k, $vocabSize)
25 | : $vocabSize; // defaults to vocab size
26 |
27 | // Get logits of nth token
28 | $logs = $this->getLogits($logits, $index);
29 |
30 | // Get top k tokens
31 | [$topLogits, $topIndices] = $logs->topk($k);
32 |
33 | // Compute softmax over logits
34 | $probabilities = $topLogits->softmax()->toArray();
35 |
36 | $sampledResults = [];
37 |
38 | for ($i = 0; $i < $this->generationConfig->num_beams; $i++) {
39 | $sampledIndex = $this->randomSelect($probabilities);
40 |
41 | $sampledResults[] = [
42 | $topIndices[$sampledIndex], // token id
43 | log($probabilities[$sampledIndex]), // score
44 | ];
45 | }
46 |
47 | return $sampledResults;
48 | }
49 | }
--------------------------------------------------------------------------------
/src/Generation/Streamers/StdOutStreamer.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\ASTForAudioClassification::class,
12 | 'wav2vec2' => \Codewithkyrian\Transformers\Models\Pretrained\Wav2Vec2ForSequenceClassification::class,
13 | ];
14 |
15 | const MODEL_CLASS_MAPPINGS = [
16 | self::MODEL_CLASS_MAPPING,
17 | ];
18 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForCTC.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\Wav2Vec2ForCTC::class,
12 | ];
13 |
14 | const MODEL_CLASS_MAPPINGS = [
15 | self::MODEL_CLASS_MAPPING,
16 | ];
17 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForCausalLM.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\GPT2LMHeadModel::class,
12 | 'gptj' => \Codewithkyrian\Transformers\Models\Pretrained\GPTJForCausalLM::class,
13 | 'gpt_bigcode' => \Codewithkyrian\Transformers\Models\Pretrained\GPTBigCodeForCausalLM::class,
14 | 'codegen' => \Codewithkyrian\Transformers\Models\Pretrained\CodeGenForCausalLM::class,
15 | 'llama' => \Codewithkyrian\Transformers\Models\Pretrained\LlamaForCausalLM::class,
16 | 'trocr' => \Codewithkyrian\Transformers\Models\Pretrained\TrOCRForCausalLM::class,
17 | 'qwen2' => \Codewithkyrian\Transformers\Models\Pretrained\Qwen2ForCausalLM::class
18 | ];
19 |
20 | const MODEL_CLASS_MAPPINGS = [
21 | self::MODEL_CLASS_MAPPING,
22 | ];
23 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForImageClassification.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\ViTForImageClassification::class,
12 | 'deit' => \Codewithkyrian\Transformers\Models\Pretrained\DeiTForImageClassification::class,
13 | ];
14 |
15 | const MODEL_CLASS_MAPPINGS = [
16 | self::MODEL_CLASS_MAPPING,
17 | ];
18 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForImageFeatureExtraction.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\CLIPVisionModelWithProjection::class,
12 | 'siglip' => \Codewithkyrian\Transformers\Models\Pretrained\SiglipVisionModel::class,
13 | ];
14 |
15 | const MODEL_CLASS_MAPPINGS = [
16 | self::MODEL_CLASS_MAPPING,
17 | AutoModel::ENCODER_ONLY_MODEL_MAPPING,
18 | AutoModel::DECODER_ONLY_MODEL_MAPPING,
19 | ];
20 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForImageToImage.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\Swin2SRForImageSuperResolution::class,
12 | ];
13 |
14 | const MODEL_CLASS_MAPPINGS = [
15 | self::MODEL_CLASS_MAPPING,
16 | ];
17 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForMaskedLM.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\AlbertForMaskedLM::class,
12 | "bert" => \Codewithkyrian\Transformers\Models\Pretrained\BertForMaskedLM::class,
13 | "deberta" => \Codewithkyrian\Transformers\Models\Pretrained\DebertaForMaskedLM::class,
14 | "deberta-v2" => \Codewithkyrian\Transformers\Models\Pretrained\DebertaV2ForMaskedLM::class,
15 | "distilbert" => \Codewithkyrian\Transformers\Models\Pretrained\DistilBertForMaskedLM::class,
16 | "mobilebert" => \Codewithkyrian\Transformers\Models\Pretrained\MobileBertForMaskedLM::class,
17 | "roberta" => \Codewithkyrian\Transformers\Models\Pretrained\RobertaForMaskedLM::class,
18 | "roformer" => \Codewithkyrian\Transformers\Models\Pretrained\RoFormerForMaskedLM::class,
19 | ];
20 |
21 | const MODEL_CLASS_MAPPINGS = [
22 | self::MODEL_CLASS_MAPPING,
23 | ];
24 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForObjectDetection.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\DetrForObjectDetection::class,
12 | 'yolos' => \Codewithkyrian\Transformers\Models\Pretrained\YolosForObjectDetection::class,
13 | ];
14 |
15 | const MODEL_CLASS_MAPPINGS = [
16 | self::MODEL_CLASS_MAPPING,
17 | ];
18 |
19 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForQuestionAnswering.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\AlbertForQuestionAnswering::class,
12 | 'bert' => \Codewithkyrian\Transformers\Models\Pretrained\BertForQuestionAnswering::class,
13 | 'deberta' => \Codewithkyrian\Transformers\Models\Pretrained\DebertaForQuestionAnswering::class,
14 | 'deberta-v2' => \Codewithkyrian\Transformers\Models\Pretrained\DebertaV2ForQuestionAnswering::class,
15 | 'distilbert' => \Codewithkyrian\Transformers\Models\Pretrained\DistilBertForQuestionAnswering::class,
16 | 'mobilebert' => \Codewithkyrian\Transformers\Models\Pretrained\MobileBertForQuestionAnswering::class,
17 | 'roberta' => \Codewithkyrian\Transformers\Models\Pretrained\RobertaForQuestionAnswering::class,
18 | 'roformer' => \Codewithkyrian\Transformers\Models\Pretrained\RoFormerForQuestionAnswering::class,
19 | ];
20 |
21 | const MODEL_CLASS_MAPPINGS = [
22 | self::MODEL_CLASS_MAPPING,
23 | ];
24 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForSeq2SeqLM.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\BartForConditionalGeneration::class,
11 | 't5' => \Codewithkyrian\Transformers\Models\Pretrained\T5ForConditionalGeneration::class,
12 | 'm2m_100' => \Codewithkyrian\Transformers\Models\Pretrained\M2M100ForConditionalGeneration::class,
13 | ];
14 |
15 | const MODEL_CLASS_MAPPINGS = [
16 | self::MODEL_CLASS_MAPPING,
17 | ];
18 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForSequenceClassification.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\AlbertForSequenceClassification::class,
13 | 'bert' => \Codewithkyrian\Transformers\Models\Pretrained\BertForSequenceClassification::class,
14 | 'bart' => \Codewithkyrian\Transformers\Models\Pretrained\BartForSequenceClassification::class,
15 | 'deberta' => \Codewithkyrian\Transformers\Models\Pretrained\DebertaForSequenceClassification::class,
16 | 'deberta-v2' => \Codewithkyrian\Transformers\Models\Pretrained\DebertaV2ForSequenceClassification::class,
17 | 'distilbert' => \Codewithkyrian\Transformers\Models\Pretrained\DistilBertForSequenceClassification::class,
18 | 'mobilebert' => \Codewithkyrian\Transformers\Models\Pretrained\MobileBertForSequenceClassification::class,
19 | 'roberta' => \Codewithkyrian\Transformers\Models\Pretrained\RobertaForSequenceClassification::class,
20 | 'roformer' => \Codewithkyrian\Transformers\Models\Pretrained\RoFormerForSequenceClassification::class,
21 | ];
22 |
23 | const MODEL_CLASS_MAPPINGS = [
24 | self::MODEL_CLASS_MAPPING,
25 | ];
26 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForSpeechSeq2Seq.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\WhisperForConditionalGeneration::class,
12 | ];
13 |
14 | const MODEL_CLASS_MAPPINGS = [
15 | self::MODEL_CLASS_MAPPING,
16 | ];
17 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForTokenClassification.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\BertForTokenClassification::class,
12 | "deberta" => \Codewithkyrian\Transformers\Models\Pretrained\DebertaForTokenClassification::class,
13 | "deberta-v2" => \Codewithkyrian\Transformers\Models\Pretrained\DebertaV2ForTokenClassification::class,
14 | "roberta" => \Codewithkyrian\Transformers\Models\Pretrained\RobertaForTokenClassification::class,
15 | 'roformer' => \Codewithkyrian\Transformers\Models\Pretrained\RoFormerForTokenClassification::class,
16 | ];
17 |
18 | const MODEL_CLASS_MAPPINGS = [
19 | self::MODEL_CLASS_MAPPING,
20 | ];
21 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForVision2Seq.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\VisionEncoderDecoderModel::class
12 | ];
13 |
14 | const MODEL_CLASS_MAPPINGS = [
15 | self::MODEL_CLASS_MAPPING,
16 | ];
17 | }
--------------------------------------------------------------------------------
/src/Models/Auto/AutoModelForZeroShotObjectDetection.php:
--------------------------------------------------------------------------------
1 | \Codewithkyrian\Transformers\Models\Pretrained\OwlViTForObjectDetection::class,
12 | 'owlv2' => \Codewithkyrian\Transformers\Models\Pretrained\Owlv2ForObjectDetection::class,
13 | ];
14 |
15 | const MODEL_CLASS_MAPPINGS = [
16 | self::MODEL_CLASS_MAPPING,
17 | ];
18 |
19 | }
--------------------------------------------------------------------------------
/src/Models/Output/BaseModelOutput.php:
--------------------------------------------------------------------------------
1 | numDecoderLayers = $this->config['decoder_layers'];
36 | $this->numDecoderHeads = $this->config['decoder_attention_heads'];
37 | $this->decoderDimKv = $this->config['d_model'] / $this->numDecoderHeads;
38 |
39 | $this->numEncoderLayers = $this->config['encoder_layers'];
40 | $this->numEncoderHeads = $this->config['encoder_attention_heads'];
41 | $this->encoderDimKv = $this->config['d_model'] / $this->numEncoderHeads;
42 | }
43 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/BartForSequenceClassification.php:
--------------------------------------------------------------------------------
1 | config['pad_token_id'] = $this->config['eos_token_id'];
29 | $this->config->padTokenId = $this->config['eos_token_id'];
30 |
31 | $this->numHeads = $this->config['n_head'];
32 | $this->numLayers = $this->config['n_layer'];
33 | $this->dimKv = $this->config['n_embd'] / $this->numHeads;
34 |
35 | }
36 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/DebertaForMaskedLM.php:
--------------------------------------------------------------------------------
1 | config['pad_token_id'] = $this->config['eos_token_id'];
30 | $this->config->padTokenId = $this->config['eos_token_id'];
31 |
32 | $this->numHeads = $this->config['n_head'];
33 | $this->numLayers = $this->config['n_layer'];
34 | $this->dimKv = $this->config['n_embd'] / $this->numHeads;
35 | }
36 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/GPTBigCodeForCausalLM.php:
--------------------------------------------------------------------------------
1 | config['pad_token_id'] = $this->config['eos_token_id'];
30 | $this->config->padTokenId = $this->config['eos_token_id'];
31 |
32 | $this->numHeads = $this->config['n_head'];
33 | $this->numLayers = $this->config['n_layer'];
34 | $this->dimKv = $this->config['n_embd'] / $this->numHeads;
35 | }
36 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/GPTJForCausalLM.php:
--------------------------------------------------------------------------------
1 | config['pad_token_id'] = $this->config['eos_token_id'];
29 | $this->config->padTokenId = $this->config['eos_token_id'];
30 |
31 | $this->numHeads = $this->config['n_head'];
32 | $this->numLayers = $this->config['n_layer'];
33 | $this->dimKv = $this->config['n_embd'] / $this->numHeads;
34 |
35 | }
36 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/LlamaForCausalLM.php:
--------------------------------------------------------------------------------
1 | config['pad_token_id'] = $this->config['eos_token_id'];
34 | $this->config->padTokenId = $this->config['eos_token_id'];
35 |
36 | $this->numHeads = $this->config['num_key_value_heads'] ?? $this->config['num_attention_heads'];
37 | $this->numLayers = $this->config['num_hidden_layers'];
38 | $this->dimKv = $this->config['hidden_size'] / $this->config['num_attention_heads'];
39 | }
40 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/M2M100ForConditionalGeneration.php:
--------------------------------------------------------------------------------
1 | numDecoderLayers = $this->config['decoder_layers'];
33 | $this->numDecoderHeads = $this->config['decoder_attention_heads'];
34 | $this->decoderDimKv = $this->config['d_model'] / $this->numDecoderHeads;
35 |
36 | $this->numEncoderLayers = $this->config['encoder_layers'];
37 | $this->numEncoderHeads = $this->config['encoder_attention_heads'];
38 | $this->encoderDimKv = $this->config['d_model'] / $this->numEncoderHeads;
39 | }
40 |
41 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/M2M100Model.php:
--------------------------------------------------------------------------------
1 | config['pad_token_id'] = $this->config['eos_token_id'];
33 | $this->config->padTokenId = $this->config['eos_token_id'];
34 |
35 | $this->numHeads = $this->config['num_key_value_heads'] ?? $this->config['num_attention_heads'];
36 | $this->numLayers = $this->config['num_hidden_layers'];
37 | $this->dimKv = $this->config['hidden_size'] / $this->config['num_attention_heads'];
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/Models/Pretrained/RoFormerForMaskedLM.php:
--------------------------------------------------------------------------------
1 | numDecoderLayers = $this->config['num_decoder_layers'];
36 | $this->numDecoderHeads = $this->config['num_heads'];
37 | $this->decoderDimKv = $this->config['d_kv'];
38 |
39 | $this->numEncoderLayers = $this->config['num_layers'];
40 | $this->numEncoderHeads = $this->config['num_heads'];
41 | $this->encoderDimKv = $this->config['d_kv'];
42 | }
43 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/T5Model.php:
--------------------------------------------------------------------------------
1 | numEncoderLayers = $this->numDecoderLayers = $this->config['decoder_layers'];
33 | $this->numEncoderHeads = $this->numDecoderHeads = $this->config['decoder_attention_heads'];
34 | $this->encoderDimKv = $this->decoderDimKv = $this->config['d_model'] / $this->numDecoderHeads;
35 | }
36 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/ViTForImageClassification.php:
--------------------------------------------------------------------------------
1 | toTensor(samplerate: 16000);
19 | * $inputs = $processor($audioTensor);
20 | *
21 | * // Run model with inputs
22 | * $model = AutoModel::from_pretrained('Xenova/mms-300m');
23 | * $output = $model($inputs);
24 | * // {
25 | * // last_hidden_state: Tensor {
26 | * // shape: [ 1, 1144, 1024 ],
27 | * // dtype: 'float32',
28 | * // buffer: (1171456) [ ... ],
29 | * // size: 1171456
30 | * // }
31 | * // }
32 | * ```
33 | */
34 | class Wav2Vec2Model extends Wav2Vec2PretrainedModel
35 | {
36 |
37 | }
--------------------------------------------------------------------------------
/src/Models/Pretrained/Wav2Vec2PretrainedModel.php:
--------------------------------------------------------------------------------
1 | new BertNormalizer($config),
25 | 'Precompiled' => new Precompiled($config),
26 | 'Sequence' => new NormalizerSequence($config),
27 | 'Replace' => new Replace($config),
28 | 'NFC' => new NFC($config),
29 | 'NFKC' => new NFKC($config),
30 | 'NFKD' => new NFKD($config),
31 | 'Strip' => new StripNormalizer($config),
32 | 'StripAccents' => new StripAccents($config),
33 | 'Lowercase' => new Lowercase($config),
34 | 'Prepend' => new Prepend($config),
35 | default => throw new \InvalidArgumentException('Unknown normalizer type: ' . $config['type'] ?? null),
36 | };
37 | }
38 |
39 | abstract public function normalize(string $text): string;
40 |
41 | public function __invoke(): string
42 | {
43 | return $this->normalize(...func_get_args());
44 | }
45 | }
--------------------------------------------------------------------------------
/src/Normalizers/NormalizerSequence.php:
--------------------------------------------------------------------------------
1 | normalizers = array_map(
23 | fn(array $config) => Normalizer::fromConfig($config),
24 | $config['normalizers']
25 | );
26 | }
27 |
28 | public function normalize(string $text): string
29 | {
30 | return array_reduce(
31 | $this->normalizers,
32 | fn(string $text, Normalizer $normalizer) => $normalizer->normalize($text),
33 | $text
34 | );
35 | }
36 | }
--------------------------------------------------------------------------------
/src/Normalizers/Prepend.php:
--------------------------------------------------------------------------------
1 | config['prepend'] . $text;
21 | }
22 | }
--------------------------------------------------------------------------------
/src/Normalizers/Replace.php:
--------------------------------------------------------------------------------
1 | config['pattern'] ?? null;
16 |
17 | if ($pattern === null) {
18 | return $text;
19 | }
20 |
21 | $regex = $pattern['Regex'] ?? null;
22 | $string = $pattern['String'] ?? null;
23 | $replacement = $this->config['content'] ?? '';
24 |
25 | if ($regex !== null) {
26 | return preg_replace("/{$regex}/u", $replacement, $text);
27 | }
28 |
29 | if ($string !== null) {
30 | return str_replace($string, $replacement, $text);
31 | }
32 |
33 | return $text;
34 | }
35 | }
--------------------------------------------------------------------------------
/src/Normalizers/StripAccents.php:
--------------------------------------------------------------------------------
1 | config['strip_left'] && $this->config['strip_right']) {
23 | // Fast path to avoid an extra trim call
24 | $text = trim($text);
25 | } else {
26 | if ($this->config['strip_left']) {
27 | $text = ltrim($text);
28 | }
29 | if ($this->config['strip_right']) {
30 | $text = rtrim($text);
31 | }
32 | }
33 | return $text;
34 | }
35 | }
--------------------------------------------------------------------------------
/src/Pipelines/SummarizationPipeline.php:
--------------------------------------------------------------------------------
1 | 'The Eiffel Tower is about the same height as an 81-storey building and the tallest structure in Paris. It is the second tallest free-standing structure in France after the Millau Viaduct.']
28 | */
29 | class SummarizationPipeline extends Text2TextGenerationPipeline
30 | {
31 | protected string $key = 'summary_text';
32 | }
--------------------------------------------------------------------------------
/src/Pipelines/TranslationPipeline.php:
--------------------------------------------------------------------------------
1 | 'La vie est comme une boîte a chocolat.']
19 | * ```
20 | *
21 | * *Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
22 | *
23 | * ```php
24 | * use function Codewithkyrian\Transformers\Pipelines\pipeline;
25 | *
26 | * $translator = pipeline('translation', model: 'Xenova/m2m100_418M');
27 | *
28 | * $output = $translator('生活就像一盒巧克力。', srcLang: 'zh', tgtLang: 'en'); // Chinese to English
29 | * // ['translation_text' => 'Life is like a box of chocolate.']
30 | * ```
31 | */
32 | class TranslationPipeline extends Text2TextGenerationPipeline
33 | {
34 | protected string $key = 'translation_text';
35 | }
--------------------------------------------------------------------------------
/src/PostProcessors/ByteLevelPostProcessor.php:
--------------------------------------------------------------------------------
1 | new BertProcessing($config),
28 | 'ByteLevel' => new ByteLevelPostProcessor($config),
29 | 'TemplateProcessing' => new TemplateProcessing($config),
30 | 'RobertaProcessing' => new RobertaProcessing($config),
31 | 'Sequence' => new PostProcessorSequence($config),
32 | default => throw new \InvalidArgumentException("Unknown post-processor type {$config['type']}"),
33 | };
34 | }
35 |
36 | /**
37 | * @param array $tokens The input tokens to be post-processed.
38 | * @param array|null $tokenPair The input tokens for the second sequence in a pair.
39 | * @param bool $addSpecialTokens Whether to add the special tokens associated with the corresponding model.
40 | * @return PostProcessedOutput
41 | */
42 | abstract public function postProcess(array $tokens, ?array $tokenPair = null, bool $addSpecialTokens = true): PostProcessedOutput;
43 |
44 | public function __invoke(array $tokens, ...$args): PostProcessedOutput
45 | {
46 | return $this->postProcess($tokens, ...$args);
47 | }
48 |
49 | }
--------------------------------------------------------------------------------
/src/PostProcessors/RobertaProcessing.php:
--------------------------------------------------------------------------------
1 | pattern = "/([$PUNCTUATION_REGEX])|\s+/u";
25 | }
26 |
27 | protected function preTokenizeText(array|string $text, array $options): array
28 | {
29 | return preg_split($this->pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) ?? [];
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/PreTokenizers/DigitsPreTokenizer.php:
--------------------------------------------------------------------------------
1 | config['individual_digits'] ? '' : '+';
16 |
17 | $digitPattern = "[\D]+|\d$individualDigits";
18 |
19 | $this->pattern = "/$digitPattern/u";
20 | }
21 |
22 | public function preTokenizeText(string|array $text, array $options): array
23 | {
24 | preg_match_all($this->pattern, $text, $matches, PREG_SPLIT_NO_EMPTY);
25 |
26 | return $matches[0] ?? [];
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/PreTokenizers/PreTokenizerSequence.php:
--------------------------------------------------------------------------------
1 | preTokenizers = array_map(
18 | fn(array $config) => PreTokenizer::fromConfig($config),
19 | $config['pretokenizers']
20 | );
21 | }
22 |
23 | public function preTokenizeText(string|array $text, array $options): array
24 | {
25 | return array_reduce(
26 | $this->preTokenizers,
27 | fn($text, PreTokenizer $preTokenizer) => $preTokenizer->preTokenize($text, $options),
28 | [$text]
29 | );
30 | }
31 | }
--------------------------------------------------------------------------------
/src/PreTokenizers/PunctuationPreTokenizer.php:
--------------------------------------------------------------------------------
1 | pattern = "/[^{$PUNCTUATION_REGEX}]+|[{$PUNCTUATION_REGEX}]+/u";
15 | }
16 | public function preTokenizeText(string|array $text, array $options): array
17 | {
18 | preg_match_all($this->pattern, $text, $matches);
19 | return $matches[0];
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/PreTokenizers/ReplacePreTokenizer.php:
--------------------------------------------------------------------------------
1 | pattern = $config['pattern'] ?? null;
16 | $this->content = $config['content'];
17 | }
18 | public function preTokenizeText(string|array $text, array $options): array
19 | {
20 | if($this->pattern === null)
21 | {
22 | return [$text];
23 | }
24 |
25 | return str_replace($this->pattern, $this->content, $text);
26 | }
27 | }
--------------------------------------------------------------------------------
/src/PreTokenizers/SplitPreTokenizer.php:
--------------------------------------------------------------------------------
1 | pattern = createPattern($config['pattern'], $config['invert']);
17 | }
18 |
19 |
20 | /**
21 | * Tokenizes text by splitting it using the given pattern.
22 | */
23 | public function preTokenizeText(string|array $text, array $options): array
24 | {
25 | if ($this->config['invert']) {
26 | preg_match_all("/$this->pattern/u", $text, $matches);
27 | return $matches[0];
28 | } else {
29 | $result = [];
30 | $offset = 0;
31 |
32 | preg_match_all("/$this->pattern/u", $text, $matches, PREG_OFFSET_CAPTURE);
33 |
34 | foreach ($matches[0] as $match) {
35 | $fullMatch = $match[0];
36 | $matchIndex = $match[1];
37 |
38 | if ($offset < $matchIndex) {
39 | $result[] = substr($text, $offset, $matchIndex - $offset);
40 | }
41 |
42 | if (strlen($fullMatch) > 0) {
43 | $result[] = $fullMatch;
44 | }
45 |
46 | $offset = $matchIndex + strlen($fullMatch);
47 | }
48 |
49 | if ($offset < strlen($text)) {
50 | $result[] = substr($text, $offset);
51 | }
52 |
53 | return $result;
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/PreTokenizers/WhitespacePreTokenizer.php:
--------------------------------------------------------------------------------
1 | ' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}";
10 | }
11 |
--------------------------------------------------------------------------------
/src/PreTrainedTokenizers/Grok1Tokenizer.php:
--------------------------------------------------------------------------------
1 | decoder = new VitsDecoder([]);
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/PreTrainedTokenizers/Wav2Vec2CTCTokenizer.php:
--------------------------------------------------------------------------------
1 | resolveDtype($array);
17 | $dtype = $this->defaultFloatType;
18 | }
19 | return new Tensor($array, $dtype, $shape);
20 | }
21 | }
--------------------------------------------------------------------------------
/src/Tensor/OpenBLASFactory.php:
--------------------------------------------------------------------------------
1 | $libFiles
23 | * @param array $lapackeLibs
24 | */
25 | public function __construct(
26 | string $headerFile,
27 | array $libFiles,
28 | )
29 | {
30 | if (self::$ffi !== null) {
31 | return;
32 | }
33 | if (!extension_loaded('ffi')) {
34 | return;
35 | }
36 |
37 | $code = file_get_contents($headerFile);
38 |
39 | foreach ($libFiles as $filename) {
40 | try {
41 | $ffi = FFI::cdef($code, $filename);
42 | } catch (FFIException $e) {
43 | continue;
44 | }
45 |
46 | self::$ffi = $ffi;
47 | break;
48 | }
49 | }
50 |
51 | public function isAvailable(): bool
52 | {
53 | return self::$ffi !== null;
54 | }
55 |
56 | public function Blas(): Blas
57 | {
58 | if (self::$ffi == null) {
59 | throw new RuntimeException('openblas library not loaded.');
60 | }
61 | return new Blas(self::$ffi);
62 | }
63 |
64 | public function Lapack(): PhpLapack
65 | {
66 | return new PhpLapack();
67 | }
68 | }
--------------------------------------------------------------------------------
/src/Tensor/TensorBufferFactory.php:
--------------------------------------------------------------------------------
1 | bufferFactory = new TensorBufferFactory();
17 |
18 | $this->openblasFactory = new OpenBLASFactory(
19 | headerFile: Library::OpenBlas->header(basePath('includes')),
20 | libFiles: [Library::OpenBlas->library(basePath('libs'))],
21 | );
22 |
23 | $this->mathFactory = new MatlibFactory(
24 | libFiles: [Library::RindowMatlib->library(basePath('libs'))]
25 | );
26 | }
27 | }
--------------------------------------------------------------------------------
/src/Tokenizers/AddedToken.php:
--------------------------------------------------------------------------------
1 | 'undefined',
22 | self::LANCZOS => 'lanczos',
23 | self::BILINEAR => 'point',
24 | self::BICUBIC => 'cubic',
25 | self::BOX => 'box',
26 | self::HAMMING => 'hamming',
27 | };
28 | }
29 |
30 | }
--------------------------------------------------------------------------------
/tests/Expectations.php:
--------------------------------------------------------------------------------
1 | extend('toMatchArrayApproximately', function (array $expected, float $precision = 0.0001) {
7 | $actual = $this->value;
8 |
9 | expect($actual)
10 | ->toBeArray()
11 | ->and(count($actual))
12 | ->toBe(count($expected))
13 | ->and($actual)
14 | ->toHaveKeys(array_keys($expected));
15 |
16 | foreach ($expected as $key => $expectedValue) {
17 | $actualValue = $actual[$key];
18 |
19 | if (is_numeric($actualValue))
20 | {
21 | $message = "Failed asserting that $actualValue at key $key ≈ $expectedValue (±$precision)";
22 | expect($actualValue)
23 | ->toEqualWithDelta($expectedValue, $precision, $message);
24 | } else
25 | {
26 | $message = "Failed asserting that $actualValue at key $key ≈ $expectedValue";
27 | expect($actualValue)
28 | ->toEqual($expectedValue, $message);
29 | }
30 | }
31 |
32 | return $this;
33 | });
34 |
--------------------------------------------------------------------------------
/tests/Pest.php:
--------------------------------------------------------------------------------
1 | setCacheDir('tests/models')
13 | ->apply();
14 | });
15 |
16 | it('can create a pipeline for a task', function () {
17 | $extractor = pipeline('feature-extraction');
18 |
19 | expect($extractor)->toBeInstanceOf(FeatureExtractionPipeline::class);
20 | });
21 |
22 |
23 | it('can create a pipeline for a task with a model', function () {
24 | $extractor = pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
25 |
26 | expect($extractor)->toBeInstanceOf(FeatureExtractionPipeline::class);
27 | });
28 |
29 | it('throws an exception when creating a pipeline for an unsupported task', function () {
30 | pipeline('unsupported-task');
31 | })->throws(UnsupportedTaskException::class);
--------------------------------------------------------------------------------
/tests/tensors/TensorBufferTest.php:
--------------------------------------------------------------------------------
1 | tensorBuffer = new TensorBuffer(5, Tensor::float32);
10 | });
11 |
12 | it('throws an exception when accessing offset with invalid type', fn() => $this->tensorBuffer['offset'])
13 | ->throws(TypeError::class);
14 |
15 | it('can create a zero-sized buffer', function () {
16 | $buffer = new TensorBuffer(0, Tensor::float32);
17 |
18 | expect($buffer->count())->toBe(0);
19 | });
20 |
21 | it('gets the correct value at the given offset using square brackets', function () {
22 | expect($this->tensorBuffer[0])->toBe(0.0)
23 | ->and($this->tensorBuffer[4])->toBe(0.0);
24 | });
25 |
26 | it('sets the value at the given offset using square brackets', function () {
27 | $this->tensorBuffer[0] = 1.5;
28 | $this->tensorBuffer[4] = 2.5;
29 |
30 | expect($this->tensorBuffer[0])->toBe(1.5)
31 | ->and($this->tensorBuffer[4])->toBe(2.5);
32 | });
33 |
34 | it('throws an exception when accessing out-of-range offset', fn() => $this->tensorBuffer[5])
35 | ->throws(OutOfRangeException::class);
36 |
37 | it('throws an exception when unsetting offset using square brackets', function () {
38 | unset($this->tensorBuffer[0]);
39 | })->throws(LogicException::class);
40 |
--------------------------------------------------------------------------------
/tests/tokenizers/Datasets.php:
--------------------------------------------------------------------------------
1 | $tests) {
7 | foreach ($tests as $test) {
8 | $label = is_string($test['input']) ? $test['input'] : json_encode($test['input']);
9 | yield "$tokenizerId: $label" => fn () => [
10 | 'tokenizerId' => $tokenizerId,
11 | 'test' => $test
12 | ];
13 | }
14 | }
15 | });
16 |
17 | dataset('template-tokenization', function () {
18 | $data = json_decode(file_get_contents(__DIR__.'/dataset-templates.json'), true);
19 |
20 | foreach ($data as $tokenizerId => $tests) {
21 | foreach ($tests as $test) {
22 | $printableKeys = ['add_generation_prompt', 'tokenize'];
23 | $label = json_encode(array_intersect_key($test, array_flip($printableKeys)));
24 | yield "$tokenizerId: $label" => fn () => [
25 | 'tokenizerId' => $tokenizerId,
26 | 'test' => $test
27 | ];
28 | }
29 | }
30 | });
31 |
--------------------------------------------------------------------------------