├── .github ├── dependabot.yml └── workflows │ └── docker-image.yml ├── .paperspace └── app.yaml ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── app.py ├── approach.png ├── data ├── README.md └── meanwhile.json ├── inputs ├── audio │ └── .DS_Store ├── saved │ └── .DS_store └── vids │ └── .DS_store ├── language-breakdown.svg ├── model-card.md ├── notebooks ├── LibriSpeech.ipynb └── Multilingual_ASR.ipynb ├── outputs └── saved │ └── .DS_store ├── requirements.txt ├── results └── subbed_vids │ └── .DS_store ├── setup.py ├── spec.yaml ├── templates ├── index.html └── logo.png ├── tests ├── jfk.flac ├── test_audio.py ├── test_normalizer.py ├── test_tokenizer.py └── test_transcribe.py ├── whisper-caption.ipynb └── whisper ├── __init__.py ├── __main__.py ├── assets ├── gpt2 │ ├── merges.txt │ ├── special_tokens_map.json │ ├── tokenizer_config.json │ └── vocab.json ├── mel_filters.npz └── multilingual │ ├── added_tokens.json │ ├── merges.txt │ ├── special_tokens_map.json │ ├── tokenizer_config.json │ └── vocab.json ├── audio.py ├── decoding.py ├── model.py ├── normalizers ├── __init__.py ├── basic.py ├── english.json └── english.py ├── tokenizer.py ├── transcribe.py └── utils.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | - package-ecosystem: "docker" 8 | directory: "/" 9 | schedule: 10 | interval: "daily" 11 | - package-ecosystem: "pip" 12 | directory: "/api" 13 | schedule: 14 | interval: "daily" 15 | - package-ecosystem: "npm" 16 | directory: "/web" 17 | schedule: 18 | interval: "daily" 19 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Create and publish a Docker image 2 | on: 3 | push: 4 | branches: 5 | - "main" 6 | paths-ignore: 7 | - "**.md" 8 | - "docker-compose.yml" 9 | - "docker-compose.dev.yml" 10 | - ".github/ISSUE_TEMPLATE/*.yml" 11 | - ".github/dependabot.yml" 12 | pull_request: 13 | branches: 14 | - "main" 15 | paths-ignore: 16 | - "**.md" 17 | - "docker-compose.yml" 18 | - "docker-compose.dev.yml" 19 | - ".github/ISSUE_TEMPLATE/*.yml" 20 | - ".github/dependabot.yml" 21 | workflow_dispatch: 22 | release: 23 | types: [published, edited] 24 | 25 | jobs: 26 | build-and-publish-image: 27 | runs-on: ubuntu-latest 28 | environment: 05a580ce02c1b4b40b081cc9f6e028 29 | steps: 30 | - name: Checkout 31 | uses: actions/checkout@v3 32 | - name: Docker metadata 33 | id: meta 34 | uses: docker/metadata-action@v4 35 | with: 36 | images: | 37 | ghcr.io/${{ github.repository }} 38 | tags: | 39 | type=raw,value=latest,enable={{is_default_branch}} 40 | type=ref,event=branch 41 | type=ref,event=pr 42 | type=semver,pattern={{version}} 43 | type=semver,pattern={{major}} 44 | type=semver,pattern={{major}}.{{minor}} 45 | 46 | - name: Set up QEMU 47 | uses: docker/setup-qemu-action@v2 48 | 49 | - name: Set up Docker Buildx 50 | uses: docker/setup-buildx-action@v2 51 | 52 | - name: Login to GitHub Container Registry 53 | if: github.event_name != 'pull_request' 54 | uses: docker/login-action@v2 55 | with: 56 | registry: ghcr.io 57 | username: ${{ github.repository_owner }} 58 | password: ${{ secrets.GITHUB_TOKEN }} 59 | 60 | - name: Build and Publish Docker Image 61 | uses: docker/build-push-action@v4 62 | id: build-push 63 | with: 64 | context: . 65 | push: ${{ github.event_name != 'pull_request' }} 66 | cache-from: type=gha 67 | cache-to: type=gha,mode=max 68 | platforms: linux/amd64,linux/arm64 69 | tags: ${{ steps.meta.outputs.tags }} 70 | labels: ${{ steps.meta.outputs.labels }} 71 | 72 | - name: Set container SHA 73 | id: set-container-sha 74 | run: | 75 | echo "::set-output name=CONTAINER_SHA::${{ steps.build-push.outputs.digest }}" 76 | echo $CONTAINER_SHA 77 | 78 | - uses: paperspace/deploy-action@v1.2 79 | name: Deploy to Paperspace 80 | id: deploy 81 | env: 82 | PAPERSPACE_API_KEY: 05a580ce02c1b4b40b081cc9f6e028 83 | CONTAINER_SHA: ${{ steps.set-container-sha.outputs.CONTAINER_SHA }} 84 | with: 85 | projectId: pioiimjvdsx 86 | configPath: /../.paperspace/app.yaml 87 | image: ghcr.io/${{ github.repository }}:latest@${{ steps.build-push.outputs.digest }} 88 | -------------------------------------------------------------------------------- /.paperspace/app.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: latest 2 | enabled: true 3 | name: gradient-ai/Whisper-AutoCaption 4 | image: ghcr.io/gradient-ai/Whisper-AutoCaption:latest 5 | port: 8008 6 | healthChecks: 7 | readiness: 8 | path: / 9 | resources: 10 | replicas: 1 11 | instanceType: P6000 12 | autoscaling: 13 | enabled: true 14 | maxReplicas: 2 15 | metrics: 16 | - metric: requestDuration 17 | summary: average 18 | value: 0.15 19 | - metric: cpu 20 | summary: average 21 | value: 30 22 | - metric: memory 23 | summary: average 24 | value: 45 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y wget \ 5 | && rm -rf /var/lib/apt/lists/* 6 | RUN apt-get update && apt-get install -y git 7 | RUN pip install flask 8 | RUN pip install Werkzeug 9 | RUN pip install numpy 10 | RUN pip install torch==1.10.0 -f https://download.pytorch.org/whl/torch_stable.html 11 | RUN pip install tqdm 12 | RUN pip install more-itertools 13 | RUN pip install transformers>=4.19.0 14 | RUN pip install opencv-python-headless 15 | RUN pip install ffmpeg-python 16 | RUN apt install ffmpeg -y 17 | RUN pip install git+https://github.com/openai/whisper.git 18 | RUN pip install pandas 19 | RUN pip install moviepy --upgrade 20 | RUN apt install imagemagick -y 21 | RUN sed -i '88d' ~/../etc/ImageMagick-6/policy.xml 22 | RUN git clone https://github.com/gradient-ai/Whisper-AutoCaption 23 | WORKDIR Whisper-AutoCaption/ 24 | RUN pip install -r requirements.txt 25 | RUN pip install -U yt-dlp 26 | RUN find .paperspace/ -type f > listOfFiles.list 27 | 28 | EXPOSE 5000 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 OpenAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include whisper/assets/* 2 | include whisper/assets/gpt2/* 3 | include whisper/assets/multilingual/* 4 | include whisper/normalizers/english.json 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Whisper Auto Caption 2 | 3 | This repo shows how to translate and automatically caption videos using Whisper and MoviePy. 4 | 5 | Launch this in Paperspace Gradient by clicking the link below. 6 | 7 | ## Launch Notebook 8 | 9 | [](https://console.paperspace.com/github/gradient-ai/Whisper-AutoCaption/blob/master/whisper-caption.ipynb?machine=Free-GPU) 10 | 11 | --- 12 | 13 | # The `subtitle_video` function 14 | 15 | The `subtitle_video` function can be accessed through the whisper-caption.ipynb Notebook. This function uses Whisper and MoviePy to take in a video, extract its audio, convert its speech into text captions, and then add those captions at the correct timeslots back to the original video. 16 | 17 | `subtitle_video` takes in the following parameters: 18 | 19 | ``` 20 | download: bool, this tells your function if you are downloading a youtube video 21 | url: str, str, the URL of youtube video to download if download is True 22 | aud_opts: dict, audio file youtube-dl options 23 | vid_opts: dict, video file youtube-dl options 24 | model_type: str, which pretrained model to download. Options are: 25 | ['tiny', 'small', 'base', 'medium','large','tiny.en', 'small.en', 'base.en', 'medium.en'] 26 | More details about model_types can be found in table in original repo here: 27 | https://github.com/openai/whisper#Available-models-and-languages 28 | name: str, name of directory to store files in in experiments folder 29 | audio_file: str, path to extracted audio file for Whisper 30 | input_file: str, path to video file for MoviePy to caption 31 | output: str, destination of final output video file 32 | uploaded_vid: str, path to uploaded video file if download is False 33 | ``` 34 | 35 | --- 36 | 37 | # The Whisper AutoCaption Flask application 38 | 39 | To deploy Whisper AutoCaption in the Flask web application, go to Gradient Deployments, and create a new deployment. Then fill in the values, and create the deployment. From there, all you need to do is click the API endpoint URL in the Deployment's details page. 40 | 41 | From there, you can directly input any video from your local computer or Youtube URL. 42 | 43 | ``` 44 | image: paperspace/whisper-autocaption:v1.01 45 | port: 5000 46 | resources: 47 | replicas: 1 48 | instanceType: RTX4000 49 | ``` 50 | 51 | The full spec is as follows: 52 | 53 | ``` 54 | enabled: true 55 | image: paperspace/whisper-autocaption:v1.01 56 | port: 5000 57 | resources: 58 | replicas: 1 59 | instanceType: RTX4000 60 | autoscaling: 61 | enabled: true 62 | maxReplicas: 5 63 | metrics: 64 | - metric: requestDuration 65 | summary: average 66 | value: 0.15 67 | - metric: cpu 68 | summary: average 69 | value: 30 70 | - metric: memory 71 | summary: average 72 | value: 45 73 | ``` 74 | 75 | --- 76 | 77 | Future plans: 78 | 79 | - API version 80 | 81 | --- 82 | 83 | [[Blog]](https://openai.com/blog/whisper) 84 | [[Paper]](https://cdn.openai.com/papers/whisper.pdf) 85 | [[Model card]](model-card.md) 86 | 87 | Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification. 88 | 89 | ## Approach 90 | 91 |  92 | 93 | A Transformer sequence-to-sequence model is trained on various speech processing tasks, including multilingual speech recognition, speech translation, spoken language identification, and voice activity detection. All of these tasks are jointly represented as a sequence of tokens to be predicted by the decoder, allowing for a single model to replace many different stages of a traditional speech processing pipeline. The multitask training format uses a set of special tokens that serve as task specifiers or classification targets. 94 | 95 | ## Setup 96 | 97 | We used Python 3.9.9 and [PyTorch](https://pytorch.org/) 1.10.1 to train and test our models, but the codebase is expected to be compatible with Python 3.7 or later and recent PyTorch versions. The codebase also depends on a few Python packages, most notably [HuggingFace Transformers](https://huggingface.co/docs/transformers/index) for their fast tokenizer implementation and [ffmpeg-python](https://github.com/kkroening/ffmpeg-python) for reading audio files. The following command will pull and install the latest commit from this repository, along with its Python dependencies 98 | 99 | pip install git+https://github.com/openai/whisper.git 100 | 101 | It also requires the command-line tool [`ffmpeg`](https://ffmpeg.org/) to be installed on your system, which is available from most package managers: 102 | 103 | ```bash 104 | # on Ubuntu or Debian 105 | sudo apt update && sudo apt install ffmpeg 106 | 107 | # on Arch Linux 108 | sudo pacman -S ffmpeg 109 | 110 | # on MacOS using Homebrew (https://brew.sh/) 111 | brew install ffmpeg 112 | 113 | # on Windows using Chocolatey (https://chocolatey.org/) 114 | choco install ffmpeg 115 | 116 | # on Windows using Scoop (https://scoop.sh/) 117 | scoop install ffmpeg 118 | ``` 119 | 120 | You may need [`rust`](http://rust-lang.org) installed as well, in case [tokenizers](https://pypi.org/project/tokenizers/) does not provide a pre-built wheel for your platform. If you see installation errors during the `pip install` command above, please follow the [Getting started page](https://www.rust-lang.org/learn/get-started) to install Rust development environment. Additionally, you may need to configure the `PATH` environment variable, e.g. `export PATH="$HOME/.cargo/bin:$PATH"`. If the installation fails with `No module named 'setuptools_rust'`, you need to install `setuptools_rust`, e.g. by running: 121 | 122 | ```bash 123 | pip install setuptools-rust 124 | ``` 125 | 126 | ## Available models and languages 127 | 128 | There are five model sizes, four with English-only versions, offering speed and accuracy tradeoffs. Below are the names of the available models and their approximate memory requirements and relative speed. 129 | 130 | | Size | Parameters | English-only model | Multilingual model | Required VRAM | Relative speed | 131 | | :----: | :--------: | :----------------: | :----------------: | :-----------: | :------------: | 132 | | tiny | 39 M | `tiny.en` | `tiny` | ~1 GB | ~32x | 133 | | base | 74 M | `base.en` | `base` | ~1 GB | ~16x | 134 | | small | 244 M | `small.en` | `small` | ~2 GB | ~6x | 135 | | medium | 769 M | `medium.en` | `medium` | ~5 GB | ~2x | 136 | | large | 1550 M | N/A | `large` | ~10 GB | 1x | 137 | 138 | For English-only applications, the `.en` models tend to perform better, especially for the `tiny.en` and `base.en` models. We observed that the difference becomes less significant for the `small.en` and `medium.en` models. 139 | 140 | Whisper's performance varies widely depending on the language. The figure below shows a WER breakdown by languages of Fleurs dataset, using the `large` model. More WER and BLEU scores corresponding to the other models and datasets can be found in Appendix D in [the paper](https://cdn.openai.com/papers/whisper.pdf). 141 | 142 |  143 | 144 | ## Command-line usage 145 | 146 | The following command will transcribe speech in audio files, using the `medium` model: 147 | 148 | whisper audio.flac audio.mp3 audio.wav --model medium 149 | 150 | The default setting (which selects the `small` model) works well for transcribing English. To transcribe an audio file containing non-English speech, you can specify the language using the `--language` option: 151 | 152 | whisper japanese.wav --language Japanese 153 | 154 | Adding `--task translate` will translate the speech into English: 155 | 156 | whisper japanese.wav --language Japanese --task translate 157 | 158 | Run the following to view all available options: 159 | 160 | whisper --help 161 | 162 | See [tokenizer.py](whisper/tokenizer.py) for the list of all available languages. 163 | 164 | ## Python usage 165 | 166 | Transcription can also be performed within Python: 167 | 168 | ```python 169 | import whisper 170 | 171 | model = whisper.load_model("base") 172 | result = model.transcribe("audio.mp3") 173 | print(result["text"]) 174 | ``` 175 | 176 | Internally, the `transcribe()` method reads the entire file and processes the audio with a sliding 30-second window, performing autoregressive sequence-to-sequence predictions on each window. 177 | 178 | Below is an example usage of `whisper.detect_language()` and `whisper.decode()` which provide lower-level access to the model. 179 | 180 | ```python 181 | import whisper 182 | 183 | model = whisper.load_model("base") 184 | 185 | # load audio and pad/trim it to fit 30 seconds 186 | audio = whisper.load_audio("audio.mp3") 187 | audio = whisper.pad_or_trim(audio) 188 | 189 | # make log-Mel spectrogram and move to the same device as the model 190 | mel = whisper.log_mel_spectrogram(audio).to(model.device) 191 | 192 | # detect the spoken language 193 | _, probs = model.detect_language(mel) 194 | print(f"Detected language: {max(probs, key=probs.get)}") 195 | 196 | # decode the audio 197 | options = whisper.DecodingOptions() 198 | result = whisper.decode(model, mel, options) 199 | 200 | # print the recognized text 201 | print(result.text) 202 | ``` 203 | 204 | ## More examples 205 | 206 | Please use the [🙌 Show and tell](https://github.com/openai/whisper/discussions/categories/show-and-tell) category in Discussions for sharing more example usages of Whisper and third-party extensions such as web demos, integrations with other tools, ports for different platforms, etc. 207 | 208 | ## License 209 | 210 | The code and the model weights of Whisper are released under the MIT License. See [LICENSE](LICENSE) for further details. 211 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from cgitb import text 3 | from yt_dlp import YoutubeDL 4 | import yt_dlp 5 | import whisper 6 | import pandas as pd 7 | from moviepy.editor import VideoFileClip 8 | import moviepy.editor as mp 9 | from moviepy.editor import * 10 | from moviepy.video.tools.subtitles import SubtitlesClip 11 | import os 12 | 13 | import cv2 14 | from os import listdir 15 | from os.path import isfile, join 16 | from werkzeug.utils import secure_filename 17 | import shutil 18 | import argparse 19 | import torch 20 | import torchvision.transforms as transforms 21 | from PIL import Image 22 | from flask import Flask, jsonify, request, render_template, redirect, url_for, send_from_directory 23 | 24 | import sys 25 | 26 | UPLOAD_FOLDER = 'inputs/vids' 27 | OUTPUT_FOLDER = 'results/subbed_vids' 28 | 29 | ALLOWED_EXTENSIONS = {'mp4', 'mov', 'webm', 'ts', 'avi', 'y4m', 'mkv'} 30 | 31 | app = Flask(__name__,static_folder='results') 32 | app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER 33 | app.config['OUTPUT_FOLDER'] = OUTPUT_FOLDER 34 | 35 | 36 | 37 | @app.route("/", methods = ['GET', 'POST']) 38 | def index(): 39 | return redirect(url_for('upload_file')) 40 | 41 | def allowed_file(filename): 42 | return '.' in filename and \ 43 | filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS 44 | 45 | @app.route("/upload", methods = ['GET', 'POST']) 46 | def upload_file(): 47 | # print(request.args.get('key', '')) 48 | source = 'inputs/vids' 49 | audio = 'inputs/audio' 50 | out = 'results/subbed_vids/' 51 | opts_aud = {'format': 'mp3/bestaudio/best','keep-video':True, 'outtmpl': f'inputs/audio/audio.mp3'} 52 | vid_opts = {'format': 'mp4/bestvideo/best','outtmpl': f'{source}/video.mp4'} 53 | for f in os.listdir(source): 54 | os.remove(os.path.join(source, f)) 55 | for f in os.listdir(audio): 56 | os.remove(os.path.join(audio, f)) 57 | for f in os.listdir(out): 58 | os.remove(os.path.join(out, f)) 59 | try: 60 | text1 = request.form.values() 61 | text1 = list(text1) 62 | with YoutubeDL(vid_opts) as ydl: 63 | ydl.download(text1) 64 | with YoutubeDL(opts_aud) as ydl: 65 | ydl.download(text1) 66 | except: 67 | None 68 | 69 | if request.method == 'POST': 70 | # check if the post request has the file part 71 | if 'file' not in request.files: 72 | if 'video.mp4' in os.listdir('inputs/vids/'): 73 | return redirect(url_for('main', name='inputs/vids/video.mp4')) 74 | print('No file part') 75 | return redirect(request.url) 76 | file = request.files['file'] 77 | 78 | # If the user does not select a file, the browser submits an 79 | # empty file without a filename. 80 | if file.filename == '': 81 | print('No selected file') 82 | return redirect(request.url) 83 | if file and allowed_file(file.filename): 84 | filename = secure_filename(file.filename) 85 | file.save(os.path.join(app.config['UPLOAD_FOLDER'], 'video.mp4')) 86 | return redirect(url_for('main', name='video.mp4')) 87 | return ''' 88 | 89 | 90 | 96 | 97 |
98 | 99 |\n", 240 | " | hypothesis | \n", 241 | "reference | \n", 242 | "
---|---|---|
0 | \n", 247 | "He hoped there would be stew for dinner, turni... | \n", 248 | "HE HOPED THERE WOULD BE STEW FOR DINNER TURNIP... | \n", 249 | "
1 | \n", 252 | "Stuffered into you, his belly counseled him. | \n", 253 | "STUFF IT INTO YOU HIS BELLY COUNSELLED HIM | \n", 254 | "
2 | \n", 257 | "After early nightfall the yellow lamps would l... | \n", 258 | "AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD L... | \n", 259 | "
3 | \n", 262 | "Hello Bertie, any good in your mind? | \n", 263 | "HELLO BERTIE ANY GOOD IN YOUR MIND | \n", 264 | "
4 | \n", 267 | "Number 10. Fresh Nelly is waiting on you. Good... | \n", 268 | "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD ... | \n", 269 | "
... | \n", 272 | "... | \n", 273 | "... | \n", 274 | "
2615 | \n", 277 | "Oh, to shoot my soul's full meaning into futur... | \n", 278 | "OH TO SHOOT MY SOUL'S FULL MEANING INTO FUTURE... | \n", 279 | "
2616 | \n", 282 | "Then I, long tried by natural ills, received t... | \n", 283 | "THEN I LONG TRIED BY NATURAL ILLS RECEIVED THE... | \n", 284 | "
2617 | \n", 287 | "I love thee freely as men strive for right. I ... | \n", 288 | "I LOVE THEE FREELY AS MEN STRIVE FOR RIGHT I L... | \n", 289 | "
2618 | \n", 292 | "I love thee with the passion put to use, in my... | \n", 293 | "I LOVE THEE WITH THE PASSION PUT TO USE IN MY ... | \n", 294 | "
2619 | \n", 297 | "I love thee with the love I seemed to lose wit... | \n", 298 | "I LOVE THEE WITH A LOVE I SEEMED TO LOSE WITH ... | \n", 299 | "
2620 rows × 2 columns
\n", 303 | "\n", 403 | " | hypothesis | \n", 404 | "reference | \n", 405 | "hypothesis_clean | \n", 406 | "reference_clean | \n", 407 | "
---|---|---|---|---|
0 | \n", 412 | "He hoped there would be stew for dinner, turni... | \n", 413 | "HE HOPED THERE WOULD BE STEW FOR DINNER TURNIP... | \n", 414 | "he hoped there would be stew for dinner turnip... | \n", 415 | "he hoped there would be stew for dinner turnip... | \n", 416 | "
1 | \n", 419 | "Stuffered into you, his belly counseled him. | \n", 420 | "STUFF IT INTO YOU HIS BELLY COUNSELLED HIM | \n", 421 | "stuffered into you his belly counseled him | \n", 422 | "stuff it into you his belly counseled him | \n", 423 | "
2 | \n", 426 | "After early nightfall the yellow lamps would l... | \n", 427 | "AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD L... | \n", 428 | "after early nightfall the yellow lamps would l... | \n", 429 | "after early nightfall the yellow lamps would l... | \n", 430 | "
3 | \n", 433 | "Hello Bertie, any good in your mind? | \n", 434 | "HELLO BERTIE ANY GOOD IN YOUR MIND | \n", 435 | "hello bertie any good in your mind | \n", 436 | "hello bertie any good in your mind | \n", 437 | "
4 | \n", 440 | "Number 10. Fresh Nelly is waiting on you. Good... | \n", 441 | "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD ... | \n", 442 | "number 10 fresh nelly is waiting on you good n... | \n", 443 | "number 10 fresh nelly is waiting on you good n... | \n", 444 | "
... | \n", 447 | "... | \n", 448 | "... | \n", 449 | "... | \n", 450 | "... | \n", 451 | "
2615 | \n", 454 | "Oh, to shoot my soul's full meaning into futur... | \n", 455 | "OH TO SHOOT MY SOUL'S FULL MEANING INTO FUTURE... | \n", 456 | "0 to shoot my soul is full meaning into future... | \n", 457 | "0 to shoot my soul is full meaning into future... | \n", 458 | "
2616 | \n", 461 | "Then I, long tried by natural ills, received t... | \n", 462 | "THEN I LONG TRIED BY NATURAL ILLS RECEIVED THE... | \n", 463 | "then i long tried by natural ills received the... | \n", 464 | "then i long tried by natural ills received the... | \n", 465 | "
2617 | \n", 468 | "I love thee freely as men strive for right. I ... | \n", 469 | "I LOVE THEE FREELY AS MEN STRIVE FOR RIGHT I L... | \n", 470 | "i love thee freely as men strive for right i l... | \n", 471 | "i love thee freely as men strive for right i l... | \n", 472 | "
2618 | \n", 475 | "I love thee with the passion put to use, in my... | \n", 476 | "I LOVE THEE WITH THE PASSION PUT TO USE IN MY ... | \n", 477 | "i love thee with the passion put to use in my ... | \n", 478 | "i love thee with the passion put to use in my ... | \n", 479 | "
2619 | \n", 482 | "I love thee with the love I seemed to lose wit... | \n", 483 | "I LOVE THEE WITH A LOVE I SEEMED TO LOSE WITH ... | \n", 484 | "i love thee with the love i seemed to lose wit... | \n", 485 | "i love thee with a love i seemed to lose with ... | \n", 486 | "
2620 rows × 4 columns
\n", 490 | "