├── .dockerignore ├── .env.example ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── docker-compose.yaml ├── docker_build.sh ├── go.mod ├── go.sum └── main.go /.dockerignore: -------------------------------------------------------------------------------- 1 | main 2 | 3 | .env 4 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | PORT=4040 2 | CORS_ALLOW_ORIGINS=* 3 | API_KEY=429683C4C977415CAAFCCE10F7D57E11 4 | ENABLE_TRANSCRIPTION=true 5 | TRANSCRIPTION_PROVIDER=openai # or groq 6 | OPENAI_API_KEY=your_openai_key_here 7 | GROK_API_KEY=your_groq_key_here 8 | TRANSCRIPTION_LANGUAGE=en # Default transcription language (optional) 9 | 10 | # S3 Storage Settings 11 | ENABLE_S3_STORAGE=true 12 | S3_ENDPOINT=play.min.io 13 | S3_ACCESS_KEY=your_access_key_here 14 | S3_SECRET_KEY=your_secret_key_here 15 | S3_BUCKET_NAME=audio-files 16 | S3_REGION=us-east-1 17 | S3_USE_SSL=true 18 | S3_URL_EXPIRATION=24h # Duration format: 1h, 24h, 7d, etc. 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | main 2 | 3 | .env 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Usar uma imagem base do Go 2 | FROM golang:1.22-alpine 3 | 4 | # Instalar ffmpeg 5 | RUN apk update && apk add --no-cache ffmpeg 6 | 7 | # Definir o diretório de trabalho no container 8 | WORKDIR /app 9 | 10 | # Copiar os arquivos go.mod e go.sum para o diretório de trabalho 11 | COPY go.mod go.sum ./ 12 | 13 | # Baixar as dependências 14 | RUN go mod download 15 | 16 | # Copiar o código-fonte para o container 17 | COPY . . 18 | 19 | # Compilar o binário do Go 20 | RUN go build -o main . 21 | 22 | # Definir a variável de ambiente para a porta 23 | ENV PORT=4040 24 | 25 | # Expor a porta definida 26 | EXPOSE ${PORT} 27 | 28 | # Comando para rodar o servidor 29 | CMD ["./main"] 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Evolution API 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Evolution Audio Converter 2 | 3 | This project is a microservice in Go that processes audio files, converts them to **opus** or **mp3** format, and returns both the duration of the audio and the converted file (as base64 or S3 URL). The service accepts audio files sent as **form-data**, **base64**, or **URL**. 4 | 5 | ## Requirements 6 | 7 | Before starting, you'll need to have the following installed: 8 | 9 | - [Go](https://golang.org/doc/install) (version 1.21 or higher) 10 | - [Docker](https://docs.docker.com/get-docker/) (to run the project in a container) 11 | - [FFmpeg](https://ffmpeg.org/download.html) (for audio processing) 12 | 13 | ## Installation 14 | 15 | ### Clone the Repository 16 | 17 | Clone this repository to your local machine: 18 | 19 | ```bash 20 | git clone https://github.com/EvolutionAPI/evolution-audio-converter.git 21 | cd evolution-audio-converter 22 | ``` 23 | 24 | ### Install Dependencies 25 | 26 | Install the project dependencies: 27 | 28 | ```bash 29 | go mod tidy 30 | ``` 31 | 32 | ### Install FFmpeg 33 | 34 | The service depends on **FFmpeg** to convert the audio. Make sure FFmpeg is installed on your system. 35 | 36 | - On Ubuntu: 37 | 38 | ```bash 39 | sudo apt update 40 | sudo apt install ffmpeg 41 | ``` 42 | 43 | - On macOS (via Homebrew): 44 | 45 | ```bash 46 | brew install ffmpeg 47 | ``` 48 | 49 | - On Windows, download FFmpeg [here](https://ffmpeg.org/download.html) and add it to your system `PATH`. 50 | 51 | ### Configuration 52 | 53 | Create a `.env` file in the project's root directory. Here are the available configuration options: 54 | 55 | #### Basic Configuration 56 | 57 | ```env 58 | PORT=4040 59 | API_KEY=your_secret_api_key_here 60 | ``` 61 | 62 | #### Transcription Configuration 63 | 64 | ```env 65 | ENABLE_TRANSCRIPTION=true 66 | TRANSCRIPTION_PROVIDER=openai # or groq 67 | OPENAI_API_KEY=your_openai_key_here 68 | GROQ_API_KEY=your_groq_key_here 69 | TRANSCRIPTION_LANGUAGE=en # Default transcription language (optional) 70 | ``` 71 | 72 | #### Storage Configuration 73 | 74 | ```env 75 | ENABLE_S3_STORAGE=true 76 | S3_ENDPOINT=play.min.io 77 | S3_ACCESS_KEY=your_access_key_here 78 | S3_SECRET_KEY=your_secret_key_here 79 | S3_BUCKET_NAME=audio-files 80 | S3_REGION=us-east-1 81 | S3_USE_SSL=true 82 | S3_URL_EXPIRATION=24h 83 | ``` 84 | 85 | ### Storage Options 86 | 87 | The service supports two storage modes for the converted audio: 88 | 89 | 1. **Base64 (default)**: Returns the audio file encoded in base64 format 90 | 2. **S3 Compatible Storage**: Uploads to S3-compatible storage (AWS S3, MinIO, etc.) and returns a presigned URL 91 | 92 | When S3 storage is enabled, the response will include a `url` instead of the `audio` field: 93 | 94 | ```json 95 | { 96 | "duration": 120, 97 | "format": "ogg", 98 | "url": "https://your-s3-endpoint/bucket/file.ogg?signature...", 99 | "transcription": "Transcribed text here..." // if transcription was requested 100 | } 101 | ``` 102 | 103 | If S3 upload fails, the service automatically falls back to base64 encoding. 104 | 105 | ## Running the Project 106 | 107 | ### Locally 108 | 109 | To run the service locally: 110 | 111 | ```bash 112 | go run main.go -dev 113 | ``` 114 | 115 | The server will be available at `http://localhost:4040`. 116 | 117 | ### Using Docker 118 | 119 | 1. **Build the Docker image**: 120 | 121 | ```bash 122 | docker build -t audio-service . 123 | ``` 124 | 125 | 2. **Run the container**: 126 | 127 | ```bash 128 | docker run -p 4040:4040 --env-file=.env audio-service 129 | ``` 130 | 131 | ## API Usage 132 | 133 | ### Authentication 134 | 135 | All requests must include the `apikey` header with your API key. 136 | 137 | ### Endpoints 138 | 139 | #### Process Audio 140 | 141 | `POST /process-audio` 142 | 143 | Accepts audio files in these formats: 144 | 145 | - Form-data 146 | - Base64 147 | - URL 148 | 149 | Optional parameters: 150 | 151 | - `format`: Output format (`mp3` or `ogg`, default: `ogg`) 152 | - `transcribe`: Enable transcription (`true` or `false`) 153 | - `language`: Transcription language code (e.g., "en", "es", "pt") 154 | 155 | #### Transcribe Only 156 | 157 | `POST /transcribe` 158 | 159 | Transcribes audio without format conversion. 160 | 161 | Optional parameters: 162 | 163 | - `language`: Transcription language code 164 | 165 | ### Example Requests 166 | 167 | #### Form-data Upload 168 | 169 | ```bash 170 | curl -X POST -F "file=@audio.mp3" \ 171 | -F "format=ogg" \ 172 | -F "transcribe=true" \ 173 | -F "language=en" \ 174 | http://localhost:4040/process-audio \ 175 | -H "apikey: your_secret_api_key_here" 176 | ``` 177 | 178 | #### Base64 Upload 179 | 180 | ```bash 181 | curl -X POST \ 182 | -d "base64=$(base64 audio.mp3)" \ 183 | -d "format=ogg" \ 184 | http://localhost:4040/process-audio \ 185 | -H "apikey: your_secret_api_key_here" 186 | ``` 187 | 188 | #### URL Upload 189 | 190 | ```bash 191 | curl -X POST \ 192 | -d "url=https://example.com/audio.mp3" \ 193 | -d "format=ogg" \ 194 | http://localhost:4040/process-audio \ 195 | -H "apikey: your_secret_api_key_here" 196 | ``` 197 | 198 | ### Response Format 199 | 200 | With S3 storage disabled (default): 201 | 202 | ```json 203 | { 204 | "duration": 120, 205 | "audio": "UklGR... (base64 of the file)", 206 | "format": "ogg", 207 | "transcription": "Transcribed text here..." // if requested 208 | } 209 | ``` 210 | 211 | With S3 storage enabled: 212 | 213 | ```json 214 | { 215 | "duration": 120, 216 | "url": "https://your-s3-endpoint/bucket/file.ogg?signature...", 217 | "format": "ogg", 218 | "transcription": "Transcribed text here..." // if requested 219 | } 220 | ``` 221 | 222 | ## License 223 | 224 | This project is licensed under the [MIT](LICENSE) license. 225 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | api: 3 | container_name: evolution_audio_converter 4 | image: atendai/evolution-audio-converter:latest 5 | restart: always 6 | ports: 7 | - 4040:4040 8 | networks: 9 | - evolution-net 10 | environment: 11 | - PORT=4040 12 | - API_KEY=429683C4C977415CAAFCCE10F7D57E11 13 | expose: 14 | - 4040 15 | 16 | 17 | networks: 18 | evolution-net: 19 | name: evolution-net 20 | driver: bridge 21 | -------------------------------------------------------------------------------- /docker_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker buildx build --platform linux/amd64,linux/arm64 -t atendai/evolution-audio-converter:latest --push . 3 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/EvolutionAPI/evolution-audio-converter 2 | 3 | go 1.22 4 | 5 | toolchain go1.22.9 6 | 7 | require ( 8 | github.com/gin-contrib/cors v1.7.2 9 | github.com/gin-gonic/gin v1.10.0 10 | github.com/joho/godotenv v1.5.1 11 | github.com/minio/minio-go/v7 v7.0.81 12 | ) 13 | 14 | require ( 15 | github.com/bytedance/sonic v1.11.6 // indirect 16 | github.com/bytedance/sonic/loader v0.1.1 // indirect 17 | github.com/cloudwego/base64x v0.1.4 // indirect 18 | github.com/cloudwego/iasm v0.2.0 // indirect 19 | github.com/dustin/go-humanize v1.0.1 // indirect 20 | github.com/gabriel-vasile/mimetype v1.4.3 // indirect 21 | github.com/gin-contrib/sse v0.1.0 // indirect 22 | github.com/go-ini/ini v1.67.0 // indirect 23 | github.com/go-playground/locales v0.14.1 // indirect 24 | github.com/go-playground/universal-translator v0.18.1 // indirect 25 | github.com/go-playground/validator/v10 v10.20.0 // indirect 26 | github.com/goccy/go-json v0.10.3 // indirect 27 | github.com/google/uuid v1.6.0 // indirect 28 | github.com/json-iterator/go v1.1.12 // indirect 29 | github.com/klauspost/compress v1.17.11 // indirect 30 | github.com/klauspost/cpuid/v2 v2.2.8 // indirect 31 | github.com/kr/text v0.2.0 // indirect 32 | github.com/leodido/go-urn v1.4.0 // indirect 33 | github.com/mattn/go-isatty v0.0.20 // indirect 34 | github.com/minio/md5-simd v1.1.2 // indirect 35 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 36 | github.com/modern-go/reflect2 v1.0.2 // indirect 37 | github.com/pelletier/go-toml/v2 v2.2.2 // indirect 38 | github.com/rs/xid v1.6.0 // indirect 39 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect 40 | github.com/ugorji/go/codec v1.2.12 // indirect 41 | golang.org/x/arch v0.8.0 // indirect 42 | golang.org/x/crypto v0.28.0 // indirect 43 | golang.org/x/net v0.30.0 // indirect 44 | golang.org/x/sys v0.26.0 // indirect 45 | golang.org/x/text v0.19.0 // indirect 46 | google.golang.org/protobuf v1.34.1 // indirect 47 | gopkg.in/yaml.v3 v3.0.1 // indirect 48 | ) 49 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= 2 | github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= 3 | github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= 4 | github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= 5 | github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= 6 | github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= 7 | github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= 8 | github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= 9 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 10 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 11 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 12 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 13 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 14 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 15 | github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= 16 | github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= 17 | github.com/gin-contrib/cors v1.7.2 h1:oLDHxdg8W/XDoN/8zamqk/Drgt4oVZDvaV0YmvVICQw= 18 | github.com/gin-contrib/cors v1.7.2/go.mod h1:SUJVARKgQ40dmrzgXEVxj2m7Ig1v1qIboQkPDTQ9t2E= 19 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= 20 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= 21 | github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= 22 | github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= 23 | github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= 24 | github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= 25 | github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= 26 | github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= 27 | github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= 28 | github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= 29 | github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= 30 | github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= 31 | github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8= 32 | github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= 33 | github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= 34 | github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= 35 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 36 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 37 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 38 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 39 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 40 | github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= 41 | github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= 42 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 43 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 44 | github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= 45 | github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= 46 | github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 47 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 48 | github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= 49 | github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= 50 | github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= 51 | github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= 52 | github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= 53 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 54 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 55 | github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= 56 | github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= 57 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 58 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 59 | github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= 60 | github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= 61 | github.com/minio/minio-go/v7 v7.0.81 h1:SzhMN0TQ6T/xSBu6Nvw3M5M8voM+Ht8RH3hE8S7zxaA= 62 | github.com/minio/minio-go/v7 v7.0.81/go.mod h1:84gmIilaX4zcvAWWzJ5Z1WI5axN+hAbM5w25xf8xvC0= 63 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 64 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 65 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 66 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 67 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 68 | github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= 69 | github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= 70 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 71 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 72 | github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8= 73 | github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= 74 | github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= 75 | github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= 76 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 77 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 78 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 79 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 80 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 81 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 82 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 83 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 84 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 85 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 86 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 87 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 88 | github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= 89 | github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= 90 | github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= 91 | github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= 92 | golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= 93 | golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= 94 | golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= 95 | golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= 96 | golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= 97 | golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= 98 | golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= 99 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 100 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 101 | golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= 102 | golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 103 | golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= 104 | golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= 105 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 106 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 107 | google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= 108 | google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 109 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 110 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 111 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 112 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 113 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 114 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 115 | nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= 116 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 117 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/base64" 7 | "encoding/json" 8 | "errors" 9 | "flag" 10 | "fmt" 11 | "io" 12 | "mime/multipart" 13 | "net/http" 14 | "os" 15 | "os/exec" 16 | "regexp" 17 | "strconv" 18 | "strings" 19 | "sync" 20 | 21 | "time" 22 | 23 | "github.com/gin-contrib/cors" 24 | "github.com/gin-gonic/gin" 25 | "github.com/joho/godotenv" 26 | "github.com/minio/minio-go/v7" 27 | "github.com/minio/minio-go/v7/pkg/credentials" 28 | ) 29 | 30 | var ( 31 | apiKey string 32 | httpClient = &http.Client{} 33 | bufferPool = sync.Pool{ 34 | New: func() interface{} { 35 | return new(bytes.Buffer) 36 | }, 37 | } 38 | allowedOrigins []string 39 | enableTranscription bool 40 | transcriptionProvider string 41 | openaiAPIKey string 42 | groqAPIKey string 43 | defaultTranscriptionLanguage string 44 | enableS3Storage bool 45 | s3Endpoint string 46 | s3AccessKey string 47 | s3SecretKey string 48 | s3BucketName string 49 | s3Region string 50 | s3UseSSL bool 51 | s3Client *minio.Client 52 | s3URLExpiration time.Duration 53 | ) 54 | 55 | func init() { 56 | devMode := flag.Bool("dev", false, "Run in development mode") 57 | flag.Parse() 58 | 59 | if *devMode { 60 | err := godotenv.Load() 61 | if err != nil { 62 | fmt.Println("Error loading .env file") 63 | } else { 64 | fmt.Println(".env file loaded successfully") 65 | } 66 | } 67 | 68 | apiKey = os.Getenv("API_KEY") 69 | if apiKey == "" { 70 | fmt.Println("API_KEY not configured in .env file") 71 | } 72 | 73 | allowOriginsEnv := os.Getenv("CORS_ALLOW_ORIGINS") 74 | if allowOriginsEnv != "" { 75 | allowedOrigins = strings.Split(allowOriginsEnv, ",") 76 | fmt.Printf("Allowed origins: %v\n", allowedOrigins) 77 | } else { 78 | allowedOrigins = []string{"*"} 79 | fmt.Println("No specific origins configured, allowing all (*)") 80 | } 81 | 82 | enableTranscription = os.Getenv("ENABLE_TRANSCRIPTION") == "true" 83 | transcriptionProvider = os.Getenv("TRANSCRIPTION_PROVIDER") 84 | openaiAPIKey = os.Getenv("OPENAI_API_KEY") 85 | groqAPIKey = os.Getenv("GROQ_API_KEY") 86 | defaultTranscriptionLanguage = os.Getenv("TRANSCRIPTION_LANGUAGE") 87 | 88 | // Configuração do S3 89 | enableS3Storage = os.Getenv("ENABLE_S3_STORAGE") == "true" 90 | if enableS3Storage { 91 | s3Endpoint = os.Getenv("S3_ENDPOINT") 92 | s3AccessKey = os.Getenv("S3_ACCESS_KEY") 93 | s3SecretKey = os.Getenv("S3_SECRET_KEY") 94 | s3BucketName = os.Getenv("S3_BUCKET_NAME") 95 | s3Region = os.Getenv("S3_REGION") 96 | s3UseSSL = os.Getenv("S3_USE_SSL") == "true" 97 | 98 | // Parse URL expiration duration, default to 24 hours 99 | expiration := os.Getenv("S3_URL_EXPIRATION") 100 | if expiration == "" { 101 | expiration = "24h" 102 | } 103 | var err error 104 | s3URLExpiration, err = time.ParseDuration(expiration) 105 | if err != nil { 106 | fmt.Printf("Invalid S3_URL_EXPIRATION format, using default 24h: %v\n", err) 107 | s3URLExpiration = 24 * time.Hour 108 | } 109 | 110 | // Initialize MinIO client 111 | minioClient, err := minio.New(s3Endpoint, &minio.Options{ 112 | Creds: credentials.NewStaticV4(s3AccessKey, s3SecretKey, ""), 113 | Secure: s3UseSSL, 114 | Region: s3Region, 115 | }) 116 | if err != nil { 117 | fmt.Printf("Error initializing S3 client: %v\n", err) 118 | return 119 | } 120 | s3Client = minioClient 121 | 122 | // Create bucket if it doesn't exist 123 | exists, err := s3Client.BucketExists(context.Background(), s3BucketName) 124 | if err != nil { 125 | fmt.Printf("Error checking bucket existence: %v\n", err) 126 | return 127 | } 128 | 129 | if !exists { 130 | err = s3Client.MakeBucket(context.Background(), s3BucketName, minio.MakeBucketOptions{Region: s3Region}) 131 | if err != nil { 132 | fmt.Printf("Error creating bucket: %v\n", err) 133 | return 134 | } 135 | fmt.Printf("Created bucket: %s\n", s3BucketName) 136 | } 137 | } 138 | } 139 | 140 | func validateAPIKey(c *gin.Context) bool { 141 | if apiKey == "" { 142 | c.JSON(http.StatusInternalServerError, gin.H{"error": "Internal server error"}) 143 | return false 144 | } 145 | 146 | requestApiKey := c.GetHeader("apikey") 147 | if requestApiKey == "" { 148 | c.JSON(http.StatusUnauthorized, gin.H{"error": "API_KEY not provided"}) 149 | return false 150 | } 151 | 152 | if requestApiKey != apiKey { 153 | c.JSON(http.StatusUnauthorized, gin.H{"error": "Invalid API_KEY"}) 154 | return false 155 | } 156 | 157 | return true 158 | } 159 | 160 | func convertAudio(inputData []byte, format string) ([]byte, int, error) { 161 | var cmd *exec.Cmd 162 | 163 | switch format { 164 | case "mp4": 165 | // Special handling for MP4 166 | cmd = exec.Command("ffmpeg", "-i", "pipe:0", 167 | "-vn", 168 | "-c:a", 169 | "aac", 170 | "-b:a", "128k", 171 | "-f", "adts", 172 | "pipe:1", 173 | ) 174 | default: 175 | // Any other audio format (e.g., .oga, .ogg, .mp3, .mp4, .m4a, .wav, etc.) 176 | cmd = exec.Command("ffmpeg", "-i", "pipe:0", 177 | "-f", 178 | "ogg", 179 | "-vn", 180 | "-c:a", 181 | "libopus", 182 | "-avoid_negative_ts", 183 | "make_zero", 184 | "-b:a", 185 | "128k", 186 | "-ar", 187 | "48000", 188 | "-ac", 189 | "1", 190 | "-write_xing", 191 | "0", 192 | "-compression_level", 193 | "10", 194 | "-application", 195 | "voip", 196 | "-fflags", 197 | "+bitexact", 198 | "-flags", 199 | "+bitexact", 200 | "-id3v2_version", 201 | "0", 202 | "-map_metadata", 203 | "-1", 204 | "-map_chapters", 205 | "-1", 206 | "-write_bext", 207 | "0", 208 | "pipe:1", 209 | ) 210 | } 211 | outBuffer := bufferPool.Get().(*bytes.Buffer) 212 | errBuffer := bufferPool.Get().(*bytes.Buffer) 213 | defer bufferPool.Put(outBuffer) 214 | defer bufferPool.Put(errBuffer) 215 | 216 | outBuffer.Reset() 217 | errBuffer.Reset() 218 | 219 | cmd.Stdin = bytes.NewReader(inputData) 220 | cmd.Stdout = outBuffer 221 | cmd.Stderr = errBuffer 222 | 223 | err := cmd.Run() 224 | if err != nil { 225 | return nil, 0, fmt.Errorf("error during conversion: %v, details: %s", err, errBuffer.String()) 226 | } 227 | 228 | convertedData := make([]byte, outBuffer.Len()) 229 | copy(convertedData, outBuffer.Bytes()) 230 | 231 | // Parsing da duração 232 | outputText := errBuffer.String() 233 | splitTime := strings.Split(outputText, "time=") 234 | 235 | if len(splitTime) < 2 { 236 | return nil, 0, errors.New("duração não encontrada") 237 | } 238 | 239 | re := regexp.MustCompile(`(\d+):(\d+):(\d+\.\d+)`) 240 | matches := re.FindStringSubmatch(splitTime[2]) 241 | if len(matches) != 4 { 242 | return nil, 0, errors.New("formato de duração não encontrado") 243 | } 244 | 245 | hours, _ := strconv.ParseFloat(matches[1], 64) 246 | minutes, _ := strconv.ParseFloat(matches[2], 64) 247 | seconds, _ := strconv.ParseFloat(matches[3], 64) 248 | duration := int(hours*3600 + minutes*60 + seconds) 249 | 250 | return convertedData, duration, nil 251 | } 252 | 253 | func fetchAudioFromURL(url string) ([]byte, error) { 254 | resp, err := httpClient.Get(url) 255 | if err != nil { 256 | return nil, err 257 | } 258 | defer resp.Body.Close() 259 | 260 | return io.ReadAll(resp.Body) 261 | } 262 | 263 | func getInputData(c *gin.Context) ([]byte, error) { 264 | if file, _, err := c.Request.FormFile("file"); err == nil { 265 | return io.ReadAll(file) 266 | } 267 | 268 | if base64Data := c.PostForm("base64"); base64Data != "" { 269 | return base64.StdEncoding.DecodeString(base64Data) 270 | } 271 | 272 | if url := c.PostForm("url"); url != "" { 273 | return fetchAudioFromURL(url) 274 | } 275 | 276 | return nil, errors.New("no file, base64 or URL provided") 277 | } 278 | 279 | func transcribeAudio(audioData []byte, language string) (string, error) { 280 | if !enableTranscription { 281 | return "", errors.New("transcription is not enabled") 282 | } 283 | 284 | // Se nenhum idioma foi especificado, use o padrão do .env 285 | if language == "" { 286 | language = defaultTranscriptionLanguage 287 | } 288 | 289 | switch transcriptionProvider { 290 | case "openai": 291 | return transcribeWithOpenAI(audioData, language) 292 | case "groq": 293 | return transcribeWithGroq(audioData, language) 294 | default: 295 | return "", errors.New("invalid transcription provider") 296 | } 297 | } 298 | 299 | func transcribeWithOpenAI(audioData []byte, language string) (string, error) { 300 | if openaiAPIKey == "" { 301 | return "", errors.New("OpenAI API key not configured") 302 | } 303 | 304 | // Se nenhum idioma foi especificado, use o padrão 305 | if language == "" { 306 | language = defaultTranscriptionLanguage 307 | } 308 | 309 | // Salvar temporariamente o arquivo 310 | tempFile, err := os.CreateTemp("", "audio-*.ogg") 311 | if err != nil { 312 | return "", err 313 | } 314 | defer os.Remove(tempFile.Name()) 315 | 316 | if _, err := tempFile.Write(audioData); err != nil { 317 | return "", err 318 | } 319 | tempFile.Close() 320 | 321 | url := "https://api.openai.com/v1/audio/transcriptions" 322 | body := &bytes.Buffer{} 323 | writer := multipart.NewWriter(body) 324 | 325 | // Adicionar o arquivo 326 | file, err := os.Open(tempFile.Name()) 327 | if err != nil { 328 | return "", err 329 | } 330 | defer file.Close() 331 | 332 | part, err := writer.CreateFormFile("file", "audio.ogg") 333 | if err != nil { 334 | return "", err 335 | } 336 | io.Copy(part, file) 337 | 338 | // Adicionar modelo e idioma 339 | writer.WriteField("model", "whisper-1") 340 | if language != "" { 341 | writer.WriteField("language", language) 342 | } 343 | 344 | writer.Close() 345 | 346 | req, err := http.NewRequest("POST", url, body) 347 | if err != nil { 348 | return "", err 349 | } 350 | 351 | req.Header.Set("Authorization", "Bearer "+openaiAPIKey) 352 | req.Header.Set("Content-Type", writer.FormDataContentType()) 353 | 354 | resp, err := http.DefaultClient.Do(req) 355 | if err != nil { 356 | return "", err 357 | } 358 | defer resp.Body.Close() 359 | 360 | if resp.StatusCode != http.StatusOK { 361 | bodyBytes, _ := io.ReadAll(resp.Body) 362 | return "", fmt.Errorf("erro na API OpenAI (status %d): %s", resp.StatusCode, string(bodyBytes)) 363 | } 364 | 365 | var result struct { 366 | Text string `json:"text"` 367 | } 368 | if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { 369 | return "", err 370 | } 371 | 372 | return result.Text, nil 373 | } 374 | 375 | func transcribeWithGroq(audioData []byte, language string) (string, error) { 376 | if groqAPIKey == "" { 377 | return "", errors.New("Groq API key not configured") 378 | } 379 | 380 | // Se nenhum idioma foi especificado, use o padrão 381 | if language == "" { 382 | language = defaultTranscriptionLanguage 383 | } 384 | 385 | // Salvar temporariamente o arquivo 386 | tempFile, err := os.CreateTemp("", "audio-*.ogg") 387 | if err != nil { 388 | return "", err 389 | } 390 | defer os.Remove(tempFile.Name()) 391 | 392 | if _, err := tempFile.Write(audioData); err != nil { 393 | return "", err 394 | } 395 | tempFile.Close() 396 | 397 | url := "https://api.groq.com/openai/v1/audio/transcriptions" 398 | body := &bytes.Buffer{} 399 | writer := multipart.NewWriter(body) 400 | 401 | // Adicionar o arquivo 402 | file, err := os.Open(tempFile.Name()) 403 | if err != nil { 404 | return "", err 405 | } 406 | defer file.Close() 407 | 408 | part, err := writer.CreateFormFile("file", "audio.ogg") 409 | if err != nil { 410 | return "", err 411 | } 412 | io.Copy(part, file) 413 | 414 | // Adicionar modelo e configurações 415 | writer.WriteField("model", "whisper-large-v3-turbo") // modelo mais rápido e com bom custo-benefício 416 | if language != "" { 417 | writer.WriteField("language", language) 418 | } 419 | writer.WriteField("response_format", "json") 420 | writer.WriteField("temperature", "0.0") // mais preciso 421 | 422 | writer.Close() 423 | 424 | req, err := http.NewRequest("POST", url, body) 425 | if err != nil { 426 | return "", err 427 | } 428 | 429 | req.Header.Set("Authorization", "Bearer "+groqAPIKey) 430 | req.Header.Set("Content-Type", writer.FormDataContentType()) 431 | 432 | resp, err := http.DefaultClient.Do(req) 433 | if err != nil { 434 | return "", err 435 | } 436 | defer resp.Body.Close() 437 | 438 | if resp.StatusCode != http.StatusOK { 439 | bodyBytes, _ := io.ReadAll(resp.Body) 440 | return "", fmt.Errorf("erro na API Groq (status %d): %s", resp.StatusCode, string(bodyBytes)) 441 | } 442 | 443 | var result struct { 444 | Text string `json:"text"` 445 | } 446 | if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { 447 | return "", err 448 | } 449 | 450 | return result.Text, nil 451 | } 452 | 453 | func uploadToS3(data []byte, format string) (string, error) { 454 | if !enableS3Storage || s3Client == nil { 455 | return "", errors.New("S3 storage is not enabled or properly configured") 456 | } 457 | 458 | // Generate unique filename 459 | filename := fmt.Sprintf("%d.%s", time.Now().UnixNano(), format) 460 | contentType := fmt.Sprintf("audio/%s", format) 461 | 462 | // Upload to S3 463 | _, err := s3Client.PutObject( 464 | context.Background(), 465 | s3BucketName, 466 | filename, 467 | bytes.NewReader(data), 468 | int64(len(data)), 469 | minio.PutObjectOptions{ContentType: contentType}, 470 | ) 471 | if err != nil { 472 | return "", fmt.Errorf("error uploading to S3: %v", err) 473 | } 474 | 475 | // Generate presigned URL 476 | url, err := s3Client.PresignedGetObject( 477 | context.Background(), 478 | s3BucketName, 479 | filename, 480 | s3URLExpiration, 481 | nil, 482 | ) 483 | if err != nil { 484 | return "", fmt.Errorf("error generating presigned URL: %v", err) 485 | } 486 | 487 | return url.String(), nil 488 | } 489 | 490 | func processAudio(c *gin.Context) { 491 | if !validateAPIKey(c) { 492 | return 493 | } 494 | 495 | inputData, err := getInputData(c) 496 | if err != nil { 497 | c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) 498 | return 499 | } 500 | 501 | format := c.DefaultPostForm("format", "ogg") 502 | 503 | convertedData, duration, err := convertAudio(inputData, format) 504 | if err != nil { 505 | c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) 506 | return 507 | } 508 | 509 | response := gin.H{ 510 | "duration": duration, 511 | "format": format, 512 | } 513 | 514 | // Handle S3 upload if enabled 515 | if enableS3Storage { 516 | url, err := uploadToS3(convertedData, format) 517 | if err != nil { 518 | fmt.Printf("Error uploading to S3: %v\n", err) 519 | // Fallback to base64 if S3 upload fails 520 | response["audio"] = base64.StdEncoding.EncodeToString(convertedData) 521 | } else { 522 | response["url"] = url 523 | } 524 | } else { 525 | response["audio"] = base64.StdEncoding.EncodeToString(convertedData) 526 | } 527 | 528 | // Handle transcription if requested 529 | if c.DefaultPostForm("transcribe", "false") == "true" { 530 | language := c.DefaultPostForm("language", "") 531 | transcription, err := transcribeAudio(convertedData, language) 532 | if err != nil { 533 | fmt.Printf("Error in transcription: %v\n", err) 534 | } else { 535 | response["transcription"] = transcription 536 | } 537 | } 538 | 539 | c.JSON(http.StatusOK, response) 540 | } 541 | 542 | func validateOrigin(origin string) bool { 543 | fmt.Printf("Validating origin: %s\n", origin) 544 | fmt.Printf("Allowed origins: %v\n", allowedOrigins) 545 | 546 | if len(allowedOrigins) == 0 { 547 | return true 548 | } 549 | 550 | if origin == "" { 551 | return true 552 | } 553 | 554 | for _, allowed := range allowedOrigins { 555 | allowed = strings.TrimSpace(allowed) 556 | 557 | if allowed == "*" { 558 | return true 559 | } 560 | 561 | if allowed == origin { 562 | fmt.Printf("Origin %s matches %s\n", origin, allowed) 563 | return true 564 | } 565 | } 566 | 567 | fmt.Printf("Origin %s not found in allowed origins\n", origin) 568 | return false 569 | } 570 | 571 | func originMiddleware() gin.HandlerFunc { 572 | return func(c *gin.Context) { 573 | origin := c.Request.Header.Get("Origin") 574 | fmt.Printf("\n=== CORS Debug ===\n") 575 | fmt.Printf("Received origin: %s\n", origin) 576 | fmt.Printf("Complete headers: %+v\n", c.Request.Header) 577 | fmt.Printf("Allowed origins: %v\n", allowedOrigins) 578 | fmt.Printf("=================\n") 579 | 580 | if origin == "" { 581 | origin = c.Request.Header.Get("Referer") 582 | fmt.Printf("Empty origin, using Referer: %s\n", origin) 583 | } 584 | 585 | if !validateOrigin(origin) { 586 | fmt.Printf("❌ Origin rejected: %s\n", origin) 587 | c.JSON(http.StatusForbidden, gin.H{"error": "Origin not allowed"}) 588 | c.Abort() 589 | return 590 | } 591 | 592 | fmt.Printf("✅ Origin accepted: %s\n", origin) 593 | c.Next() 594 | } 595 | } 596 | 597 | func transcribeOnly(c *gin.Context) { 598 | if !validateAPIKey(c) { 599 | return 600 | } 601 | 602 | inputData, err := getInputData(c) 603 | if err != nil { 604 | c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) 605 | return 606 | } 607 | 608 | // Converter para ogg primeiro 609 | convertedData, _, err := convertAudio(inputData, "ogg") 610 | if err != nil { 611 | c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) 612 | return 613 | } 614 | 615 | // Pega o idioma da requisição ou usa vazio para usar o padrão do .env 616 | language := c.DefaultPostForm("language", "") 617 | transcription, err := transcribeAudio(convertedData, language) 618 | if err != nil { 619 | c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) 620 | return 621 | } 622 | 623 | c.JSON(http.StatusOK, gin.H{ 624 | "transcription": transcription, 625 | }) 626 | } 627 | 628 | func main() { 629 | port := os.Getenv("PORT") 630 | if port == "" { 631 | port = "8080" 632 | } 633 | 634 | router := gin.Default() 635 | 636 | config := cors.DefaultConfig() 637 | config.AllowOrigins = allowedOrigins 638 | config.AllowMethods = []string{"POST", "GET", "OPTIONS"} 639 | config.AllowHeaders = []string{"Origin", "Content-Type", "Accept", "Authorization", "apikey"} 640 | config.AllowCredentials = true 641 | 642 | router.Use(cors.New(config)) 643 | router.Use(originMiddleware()) 644 | 645 | router.POST("/process-audio", processAudio) 646 | router.POST("/transcribe", transcribeOnly) 647 | 648 | router.Run(":" + port) 649 | } --------------------------------------------------------------------------------