├── SAS.jpg ├── logo.jpg ├── sample.wav ├── webapp1.jpg ├── webapp2.jpg ├── webapp3.jpg ├── webapp4.jpg ├── document.gif ├── Translation.png ├── logotranslate.png ├── sttranslatelogo.jpg ├── source └── worddocument.docx ├── translated ├── worddocument_translated_ar.docx ├── worddocument_translated_fr.docx ├── worddocument_translated_it.docx └── worddocument_translated_zh-Hans.docx ├── azure.env ├── README.md ├── 3 Transliterate example.ipynb ├── 2 Language detection.ipynb ├── 5 Document translation batch.ipynb ├── 6 Synchronous Document Translation.ipynb ├── 4 Translation.ipynb └── 1 Azure AI Translator informations.ipynb /SAS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/SAS.jpg -------------------------------------------------------------------------------- /logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/logo.jpg -------------------------------------------------------------------------------- /sample.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/sample.wav -------------------------------------------------------------------------------- /webapp1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/webapp1.jpg -------------------------------------------------------------------------------- /webapp2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/webapp2.jpg -------------------------------------------------------------------------------- /webapp3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/webapp3.jpg -------------------------------------------------------------------------------- /webapp4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/webapp4.jpg -------------------------------------------------------------------------------- /document.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/document.gif -------------------------------------------------------------------------------- /Translation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/Translation.png -------------------------------------------------------------------------------- /logotranslate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/logotranslate.png -------------------------------------------------------------------------------- /sttranslatelogo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/sttranslatelogo.jpg -------------------------------------------------------------------------------- /source/worddocument.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/source/worddocument.docx -------------------------------------------------------------------------------- /translated/worddocument_translated_ar.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/translated/worddocument_translated_ar.docx -------------------------------------------------------------------------------- /translated/worddocument_translated_fr.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/translated/worddocument_translated_fr.docx -------------------------------------------------------------------------------- /translated/worddocument_translated_it.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/translated/worddocument_translated_it.docx -------------------------------------------------------------------------------- /translated/worddocument_translated_zh-Hans.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/retkowsky/azure-ai-translator/HEAD/translated/worddocument_translated_zh-Hans.docx -------------------------------------------------------------------------------- /azure.env: -------------------------------------------------------------------------------- 1 | # Azure AI Translation service 2 | AZURE_AI_TRANSLATION_ENDPOINTDOCUMENT = "tobecompleted" 3 | AZURE_AI_TRANSLATION_ENDPOINT = "tobecompleted" 4 | AZURE_AI_TRANSLATION_KEY = "tobecompleted" 5 | AZURE_AI_TRANSLATION_REGION = "tobecompleted" 6 | 7 | # Storage accounts 8 | sourceblobsasurl = "tobecompleted" 9 | sourceblobsastoken = "tobecompleted" 10 | 11 | # Target blob SAS 12 | targetblobsasurl = "tobecompleted" 13 | targetblobsastoken = "tobecompleted" 14 | 15 | # Output Blob Storage connection string 16 | connection_string = "tobecompleted" 17 | 18 | # Azure OpenAI 19 | AOAI_ENDPOINT = "tobecompleted" 20 | AOAI_KEY = "tobecompleted" 21 | AOAI_VERSION = "2024-02-01" 22 | AOAI_TYPE = "azure" 23 | AOAI_DEPLOYMENT = "tobecompleted" # Your Whisper model deployed name 24 | AOAI_MODEL = "tobecompleted" 25 | 26 | # Azure Speech Services 27 | azure_speech_key = "tobecompleted" 28 | azure_speech_region = "tobecompleted" 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Azure AI Translator: Demos & Professional Notebooks 2 | 3 | Azure AI Translator Logo 4 | 5 | **Azure AI Translator** is a robust, cloud-based neural machine translation service providing fast, accurate text and document translations in 135+ languages. Easily integrate real-time or batch translation directly into your applications using simple REST APIs. Take translation further with advanced features like custom translation models and containerized deployments for full control. 6 | 7 | --- 8 | 9 | ## 🚀 Key Features 10 | 11 | - **Fast, Accurate Translation:** Leverages state-of-the-art neural machine translation for superior quality. 12 | - **Text and Document Translation:** Translating both structured and unstructured data, synchronously or in batch/asynchronous modes. 13 | - **Custom Translator:** Tailor models using your translation memory and domain-specific data for optimal results. 14 | - **Automated Language Detection & Transliteration:** Detect languages and convert between scripts automatically. 15 | - **Vocal & Speech Integration:** Combine with Azure Speech Services for end-to-end voice translation pipelines. 16 | - **Containers Support:** Deploy locally or at the edge for regulatory or latency requirements. 17 | 18 | Explore the official documentation for the full feature set: 19 | https://azure.microsoft.com/en-us/products/ai-services/ai-translator 20 | 21 | --- 22 | 23 | ## 📓 Python Demo Notebooks 24 | 25 | Learn and prototype using a comprehensive set of Jupyter notebooks: 26 | 27 | 1. **Service Overview:** 28 | [Azure AI Translator informations.ipynb](1%20Azure%20AI%20Translator%20informations.ipynb) 29 | 2. **Language Detection:** 30 | [Language detection.ipynb](2%20Language%20detection.ipynb) 31 | 3. **Transliteration Examples:** 32 | [Transliterate example.ipynb](3%20Transliterate%20example.ipynb) 33 | 4. **Text Translation:** 34 | [Translation.ipynb](4%20Translation.ipynb) 35 | 5. **Batch Document Translation:** 36 | [Document translation batch.ipynb](5%20Document%20translation%20batch.ipynb) 37 | 6. **Synchronous Document Translation:** 38 | [Synchronous Document Translation.ipynb](6%20Synchronous%20Document%20Translation.ipynb) 39 | 7. **Vocal Translator Using Azure AI:** 40 | [Vocal Translator using Azure AI.ipynb](7%20Vocal%20Translator%20using%20Azure%20AI.ipynb) 41 | 8. **Voice Translation with Azure Speech Services:** 42 | [Vocal Translator using Azure AI STT.ipynb](8%20Vocal%20Translator%20using%20Azure%20AI%20STT.ipynb) 43 | 44 | > **Note:** 45 | > Please update the `azure.env` file with your Azure AI Services credentials to run the demos. 46 | 47 | --- 48 | 49 | ## 🖥️ Web Application Examples 50 | 51 | - **Text Translation Webapp:** 52 | Text Translation Webapp Screenshot 53 | 54 | - **Document Translation Webapp:** 55 | Document Translation Webapp Screenshot 56 | 57 | - **Vocal Translation Webapp:** 58 | Vocal Translation Webapp Screenshot 59 | 60 | --- 61 | 62 | ## 🔥 Azure AI Translator Highlights 63 | 64 | ### Text Translation 65 | Translate text between over 135 languages in real time. Supports custom dictionaries and translation exclusions. 66 | - Docs: https://learn.microsoft.com/en-us/azure/ai-services/translator/text-translation-overview 67 | 68 | ### Asynchronous Batch Document Translation 69 | Automate translations for large-scale and complex documents while preserving original structure and formatting. Glossary support included. 70 | - Docs: https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/overview 71 | 72 | ### Synchronous Document Translation 73 | Quickly translate single documents (no blob storage required). Optional glossary support. 74 | - Docs: https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/reference/synchronous-rest-api-guide 75 | 76 | ### Custom Translator 77 | Train and deploy translation models customized to your domain, terminology, and style. 78 | - Docs: https://learn.microsoft.com/en-us/azure/ai-services/translator/custom-translator/overview 79 | 80 | --- 81 | 82 | ## 📚 Additional Documentation & Resources 83 | 84 | - [Azure AI Translator documentation](https://learn.microsoft.com/en-us/azure/ai-services/translator/) 85 | - [Language Support List](https://learn.microsoft.com/en-us/azure/ai-services/translator/language-support) 86 | - [Service Pricing](https://azure.microsoft.com/en-us/pricing/details/cognitive-services/translator/) 87 | - [What’s New](https://learn.microsoft.com/en-us/azure/ai-services/translator/whats-new?tabs=csharp) 88 | - [Official Windows Application](https://github.com/MicrosoftTranslator/DocumentTranslation/releases) 89 | - [MicrosoftTranslator Repositories](https://github.com/orgs/MicrosoftTranslator/repositories) 90 | - [Translator FAQ](https://learn.microsoft.com/en-us/azure/ai-services/translator/translator-faq) 91 | 92 | --- 93 | 94 | ## 📝 Getting Started 95 | 96 | 1. Clone this repository. 97 | 2. Update the `azure.env` file with your Azure subscription and Translator credentials. 98 | 3. Open and explore the notebooks in your preferred Python & Jupyter environment. 99 | 100 | --- 101 | 102 | ## 📅 Project Info 103 | 104 | - **Created:** 10-April-2024 105 | - **Updated:** 20-November-2025 106 | - **Author:** Serge Retkowsky 107 | - [LinkedIn](https://www.linkedin.com/in/serger/) | serge.retkowsky@microsoft.com -------------------------------------------------------------------------------- /3 Transliterate example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ce125097", 6 | "metadata": {}, 7 | "source": [ 8 | "# Transliterate\n", 9 | "https://learn.microsoft.com/en-us/azure/ai-services/translator/reference/v3-0-transliterate\n", 10 | "\n", 11 | "" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "id": "632fc324", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import datetime\n", 22 | "import json\n", 23 | "import os\n", 24 | "import requests\n", 25 | "import sys\n", 26 | "import uuid\n", 27 | "\n", 28 | "from dotenv import load_dotenv" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "id": "af0461df", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "'3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]'" 41 | ] 42 | }, 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "sys.version" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "id": "7323c7bb", 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "Today is 05-Mar-2025 11:12:23\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "print(f\"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}\")" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "3c7c3e4c", 73 | "metadata": {}, 74 | "source": [ 75 | "## Azure AI Translator credentials" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "id": "5eef198c", 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "load_dotenv(\"azure.env\")\n", 86 | "\n", 87 | "azure_ai_translator_key = os.getenv(\"AZURE_AI_TRANSLATION_KEY\")\n", 88 | "azure_ai_translator_endpoint = os.getenv(\"AZURE_AI_TRANSLATION_ENDPOINT\")\n", 89 | "azure_ai_translator_region = os.getenv(\"AZURE_AI_TRANSLATION_REGION\")" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "id": "e2acc88e", 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "path = \"/transliterate?api-version=3.0\"\n", 100 | "params = \"&language=ja&fromScript=jpan&toScript=latn\"\n", 101 | "\n", 102 | "constructed_url = azure_ai_translator_endpoint + path + params\n", 103 | "\n", 104 | "headers = {\n", 105 | " \"Ocp-Apim-Subscription-Key\": azure_ai_translator_key,\n", 106 | " \"Ocp-Apim-Subscription-Region\": azure_ai_translator_region,\n", 107 | " \"Content-type\": \"application/json\",\n", 108 | " \"X-ClientTraceId\": str(uuid.uuid4()),\n", 109 | "}" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "id": "3371644f", 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "# Transliterate \"good afternoon\" from source Japanese.\n", 120 | "# Note: You can pass more than one object in body.\n", 121 | "body = [{\"text\": \"こんにちは\"}]" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "id": "3d332428", 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "[{'text': 'こんにちは'}]" 134 | ] 135 | }, 136 | "execution_count": 7, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "body" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 8, 148 | "id": "d6e309a5", 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "\u001b[1;31;34m\n", 156 | "[\n", 157 | " {\n", 158 | " \"script\": \"Latn\",\n", 159 | " \"text\": \"Kon'nichiwa​\"\n", 160 | " }\n", 161 | "]\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "request = requests.post(constructed_url, headers=headers, json=body)\n", 167 | "response = request.json()\n", 168 | "\n", 169 | "print(\"\\033[1;31;34m\")\n", 170 | "print(json.dumps(response, sort_keys=True, indent=4,\n", 171 | " ensure_ascii=False, separators=(',', ': ')))" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 9, 177 | "id": "be626e75", 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "Result: Kon'nichiwa​ with script = Latn\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "print(f\"Result: {response[0]['text']} with script = {response[0]['script']}\")" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "id": "6fc820de", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3.10 - SDK v2", 204 | "language": "python", 205 | "name": "python310-sdkv2" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.10.14" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 5 222 | } 223 | -------------------------------------------------------------------------------- /2 Language detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "bef80185", 6 | "metadata": {}, 7 | "source": [ 8 | "# Language detection\n", 9 | "https://learn.microsoft.com/en-us/azure/ai-services/translator/reference/v3-0-detect\n", 10 | "\n", 11 | "" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "id": "06adda2b", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import datetime\n", 22 | "import json\n", 23 | "import os\n", 24 | "import requests\n", 25 | "import sys\n", 26 | "import uuid\n", 27 | "\n", 28 | "from dotenv import load_dotenv" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "id": "d8a093ee", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "'3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]'" 41 | ] 42 | }, 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "sys.version" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "id": "1b76cc36", 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "Today is 05-Mar-2025 11:12:04\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "print(f\"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}\")" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "ad5c0c6b", 73 | "metadata": {}, 74 | "source": [ 75 | "## Azure AI Translator credentials" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "id": "18ecb048", 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "load_dotenv(\"azure.env\")\n", 86 | "\n", 87 | "azure_ai_translator_key = os.getenv(\"AZURE_AI_TRANSLATION_KEY\")\n", 88 | "azure_ai_translator_endpoint = os.getenv(\"AZURE_AI_TRANSLATION_ENDPOINT\")\n", 89 | "azure_ai_translator_region = os.getenv(\"AZURE_AI_TRANSLATION_REGION\")" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "id": "a3b0020d", 95 | "metadata": {}, 96 | "source": [ 97 | "## Examples" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "id": "07a38837", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "# Note: You can pass more than one object in body.\n", 108 | "body = [{\"text\": \"Bonjour, bienvenue à cette présentation Azure !\"}]" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "id": "1fc09d78", 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "\u001b[1;31;34m\n", 122 | "[\n", 123 | " {\n", 124 | " \"isTranslationSupported\": true,\n", 125 | " \"isTransliterationSupported\": false,\n", 126 | " \"language\": \"fr\",\n", 127 | " \"score\": 1.0\n", 128 | " }\n", 129 | "]\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "path = \"/detect?api-version=3.0\"\n", 135 | "constructed_url = azure_ai_translator_endpoint + path\n", 136 | "\n", 137 | "headers = {\n", 138 | " \"Ocp-Apim-Subscription-Key\": azure_ai_translator_key,\n", 139 | " \"Ocp-Apim-Subscription-Region\": azure_ai_translator_region,\n", 140 | " \"Content-type\": \"application/json\",\n", 141 | " \"X-ClientTraceId\": str(uuid.uuid4()),\n", 142 | "}\n", 143 | "\n", 144 | "\n", 145 | "request = requests.post(constructed_url, headers=headers, json=body)\n", 146 | "response = request.json()\n", 147 | "\n", 148 | "print(\"\\033[1;31;34m\")\n", 149 | "print(\n", 150 | " json.dumps(\n", 151 | " response, sort_keys=True, indent=4, ensure_ascii=False, separators=(\",\", \": \")\n", 152 | " )\n", 153 | ")" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 7, 159 | "id": "eddc9a2c", 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "Detected language: fr with confidence = 1.0\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "print(f\"Detected language: {response[0]['language']} with confidence = {response[0]['score']}\")" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 8, 177 | "id": "a67b80c1", 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "body = [{\"text\": \"こんにちは\"}]" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 9, 187 | "id": "88d460b9", 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "\u001b[1;31;34m\n", 195 | "[\n", 196 | " {\n", 197 | " \"isTranslationSupported\": true,\n", 198 | " \"isTransliterationSupported\": true,\n", 199 | " \"language\": \"ja\",\n", 200 | " \"score\": 1.0\n", 201 | " }\n", 202 | "]\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "request = requests.post(constructed_url, headers=headers, json=body)\n", 208 | "response = request.json()\n", 209 | "\n", 210 | "print(\"\\033[1;31;34m\")\n", 211 | "print(\n", 212 | " json.dumps(\n", 213 | " response, sort_keys=True, indent=4, ensure_ascii=False, separators=(\",\", \": \")\n", 214 | " )\n", 215 | ")" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 10, 221 | "id": "cd71bf02", 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "Detected language: ja with confidence = 1.0\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "print(f\"Detected language: {response[0]['language']} with confidence = {response[0]['score']}\")" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 11, 239 | "id": "a2b27d39", 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "body = [{\"text\": \"Salve. Benvenuti.\"}]" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 12, 249 | "id": "65f73db3", 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "name": "stdout", 254 | "output_type": "stream", 255 | "text": [ 256 | "\u001b[1;31;34m\n", 257 | "[\n", 258 | " {\n", 259 | " \"isTranslationSupported\": true,\n", 260 | " \"isTransliterationSupported\": false,\n", 261 | " \"language\": \"it\",\n", 262 | " \"score\": 1.0\n", 263 | " }\n", 264 | "]\n" 265 | ] 266 | } 267 | ], 268 | "source": [ 269 | "request = requests.post(constructed_url, headers=headers, json=body)\n", 270 | "response = request.json()\n", 271 | "\n", 272 | "print(\"\\033[1;31;34m\")\n", 273 | "print(\n", 274 | " json.dumps(\n", 275 | " response, sort_keys=True, indent=4, ensure_ascii=False, separators=(\",\", \": \")\n", 276 | " )\n", 277 | ")" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 13, 283 | "id": "99ecdae8", 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "name": "stdout", 288 | "output_type": "stream", 289 | "text": [ 290 | "Detected language: it with confidence = 1.0\n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "print(f\"Detected language: {response[0]['language']} with confidence = {response[0]['score']}\")" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 14, 301 | "id": "4b7c9628", 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "body = [{\"text\": \"مَسَاءُ الْخَيْرْ\"}]" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 15, 311 | "id": "4b7bea6e", 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "name": "stdout", 316 | "output_type": "stream", 317 | "text": [ 318 | "\u001b[1;31;34m\n", 319 | "[\n", 320 | " {\n", 321 | " \"isTranslationSupported\": true,\n", 322 | " \"isTransliterationSupported\": true,\n", 323 | " \"language\": \"ar\",\n", 324 | " \"score\": 0.96\n", 325 | " }\n", 326 | "]\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "request = requests.post(constructed_url, headers=headers, json=body)\n", 332 | "response = request.json()\n", 333 | "\n", 334 | "print(\"\\033[1;31;34m\")\n", 335 | "print(\n", 336 | " json.dumps(\n", 337 | " response, sort_keys=True, indent=4, ensure_ascii=False, separators=(\",\", \": \")\n", 338 | " )\n", 339 | ")" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 16, 345 | "id": "a2ff7051", 346 | "metadata": {}, 347 | "outputs": [ 348 | { 349 | "name": "stdout", 350 | "output_type": "stream", 351 | "text": [ 352 | "Detected language: ar with confidence = 0.96\n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "print(f\"Detected language: {response[0]['language']} with confidence = {response[0]['score']}\")" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 17, 363 | "id": "e4aaf80a", 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "body = [{\"text\": \"안녕하세요\"}]" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 18, 373 | "id": "2525505a", 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "\u001b[1;31;34m\n", 381 | "[\n", 382 | " {\n", 383 | " \"isTranslationSupported\": true,\n", 384 | " \"isTransliterationSupported\": true,\n", 385 | " \"language\": \"ko\",\n", 386 | " \"score\": 1.0\n", 387 | " }\n", 388 | "]\n" 389 | ] 390 | } 391 | ], 392 | "source": [ 393 | "request = requests.post(constructed_url, headers=headers, json=body)\n", 394 | "response = request.json()\n", 395 | "\n", 396 | "print(\"\\033[1;31;34m\")\n", 397 | "print(\n", 398 | " json.dumps(\n", 399 | " response, sort_keys=True, indent=4, ensure_ascii=False, separators=(\",\", \": \")\n", 400 | " )\n", 401 | ")" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 19, 407 | "id": "e588561c", 408 | "metadata": {}, 409 | "outputs": [ 410 | { 411 | "name": "stdout", 412 | "output_type": "stream", 413 | "text": [ 414 | "Detected language: ko with confidence = 1.0\n" 415 | ] 416 | } 417 | ], 418 | "source": [ 419 | "print(f\"Detected language: {response[0]['language']} with confidence = {response[0]['score']}\")" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "id": "48eeb707", 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [] 429 | } 430 | ], 431 | "metadata": { 432 | "kernelspec": { 433 | "display_name": "Python 3.10 - SDK v2", 434 | "language": "python", 435 | "name": "python310-sdkv2" 436 | }, 437 | "language_info": { 438 | "codemirror_mode": { 439 | "name": "ipython", 440 | "version": 3 441 | }, 442 | "file_extension": ".py", 443 | "mimetype": "text/x-python", 444 | "name": "python", 445 | "nbconvert_exporter": "python", 446 | "pygments_lexer": "ipython3", 447 | "version": "3.10.14" 448 | } 449 | }, 450 | "nbformat": 4, 451 | "nbformat_minor": 5 452 | } 453 | -------------------------------------------------------------------------------- /5 Document translation batch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "313da351", 6 | "metadata": {}, 7 | "source": [ 8 | "# Document translation batch (asynchronous mode)\n", 9 | "\n", 10 | "https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/how-to-guides/use-rest-api-programmatically?tabs=csharp\n", 11 | "\n", 12 | "" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "id": "0e17c0d3", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import datetime\n", 23 | "import json\n", 24 | "import os\n", 25 | "import requests\n", 26 | "import sys\n", 27 | "import time\n", 28 | "\n", 29 | "from azure.ai.translation.document import DocumentTranslationClient\n", 30 | "from azure.core.credentials import AzureKeyCredential\n", 31 | "from azure.storage.blob import BlobServiceClient\n", 32 | "from dotenv import load_dotenv" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "id": "326398ad", 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": [ 44 | "'3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]'" 45 | ] 46 | }, 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": [ 53 | "sys.version" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "id": "5e96ffdb", 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "Today is 05-Mar-2025 11:17:25\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "print(f\"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}\")" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "id": "e8f284bc", 77 | "metadata": {}, 78 | "source": [ 79 | "## Azure AI Translator credentials" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "id": "6dd102a2", 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "load_dotenv(\"azure.env\")\n", 90 | "\n", 91 | "azure_ai_translator_key = os.getenv(\"AZURE_AI_TRANSLATION_KEY\")\n", 92 | "azure_ai_translator_endpoint = os.getenv(\"AZURE_AI_TRANSLATION_ENDPOINTDOCUMENT\")\n", 93 | "azure_ai_translator_region = os.getenv(\"AZURE_AI_TRANSLATION_REGION\")\n", 94 | "\n", 95 | "blob_results_connection_string = os.getenv(\"connection_string\")" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "id": "ac11c907", 101 | "metadata": {}, 102 | "source": [ 103 | "## Shared Access Signatures (SAS)\n", 104 | "\n", 105 | "In order to read and write from your blobs in your Azure storage account you need to do these steps:

\n", 106 | "1 Go the container from your storage account
\n", 107 | "2 Go to **\"Settings / Shared Access Tokens\"**
\n", 108 | "3 Check the **\"permissions\"**
\n", 109 | "4 Check the **\"expiry\"** informations
\n", 110 | "5 Then click on **\"Generate SAS Token and URL\".**
\n", 111 | "6 You will have your **\"Blob SAS token\"** and the **\"Blob SAS URL\"**.
\n", 112 | "\n", 113 | "- Below we have two blob storage (one for the source documents, another one for the translated documents).\n", 114 | "- For the source blob storage you should define the **\"Read\"** and **\"List\"** permissions (step 3).\n", 115 | "- For the results blob strage you should define the **\"Write\"** and **\"List\"** permissions (step 3)." 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "id": "94645048", 121 | "metadata": {}, 122 | "source": [ 123 | "" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "id": "5f8d1884", 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "# Source blob SAS\n", 134 | "sourceblobsasurl = os.getenv(\"sourceblobsasurl\")\n", 135 | "sourceblobsastoken = os.getenv(\"sourceblobsastoken\")\n", 136 | "targetblobsasurl = os.getenv(\"targetblobsasurl\")\n", 137 | "targetblobsastoken = os.getenv(\"targetblobsastoken\")\n", 138 | "\n", 139 | "sourceUri = sourceblobsasurl + sourceblobsastoken\n", 140 | "targetUri = targetblobsasurl + targetblobsastoken" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "id": "33abd56a", 146 | "metadata": {}, 147 | "source": [ 148 | "## Azure AI Translator client" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 6, 154 | "id": "bfdd6495", 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "# initialize a new instance of the DocumentTranslationClient object to interact with the Document Translation feature\n", 159 | "translator_client = DocumentTranslationClient(azure_ai_translator_endpoint, AzureKeyCredential(azure_ai_translator_key))" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "2c7ce4d7", 165 | "metadata": {}, 166 | "source": [ 167 | "## Example" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 7, 173 | "id": "4178533b", 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "targetLanguage = \"fr\"" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "id": "3f4aace0", 183 | "metadata": {}, 184 | "source": [ 185 | "> https://aka.ms/TranslatorLanguageCodes " 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 8, 191 | "id": "59145268", 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "Starting to translate the documents to language:fr ...\n", 199 | "\n", 200 | "Elapsed time: 00:01:00.442654\n" 201 | ] 202 | } 203 | ], 204 | "source": [ 205 | "# include source and target locations and target language code for the begin translation operation\n", 206 | "print(\n", 207 | " f\"Starting to translate the documents to language:{targetLanguage} ...\")\n", 208 | "\n", 209 | "start = time.time()\n", 210 | "\n", 211 | "poller = translator_client.begin_translation(sourceUri, targetUri, targetLanguage)\n", 212 | "result = poller.result()\n", 213 | "\n", 214 | "elapsed = time.time() - start\n", 215 | "print(\"\\nElapsed time: \" + time.strftime(\"%H:%M:%S.{}\".format(str(elapsed %\n", 216 | " 1)[2:])[:15], time.gmtime(elapsed)))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "id": "2d22d0e2", 222 | "metadata": {}, 223 | "source": [ 224 | "## Results" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 9, 230 | "id": "1bf61768", 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "name": "stdout", 235 | "output_type": "stream", 236 | "text": [ 237 | "\u001b[1;31;34m\n", 238 | "Status: Succeeded\n", 239 | "Created on: 2025-03-05 11:17:26.188541+00:00\n", 240 | "Last updated on: 2025-03-05 11:17:49.795860+00:00\n", 241 | "\n", 242 | "Total number of translations on documents: 4\n", 243 | " 0 failed\n", 244 | " 4 succeeded\n" 245 | ] 246 | } 247 | ], 248 | "source": [ 249 | "print(\"\\033[1;31;34m\")\n", 250 | "print(\"Status: {}\".format(poller.status()))\n", 251 | "print(\"Created on: {}\".format(poller.details.created_on))\n", 252 | "print(\"Last updated on: {}\".format(poller.details.last_updated_on))\n", 253 | "print(\n", 254 | " \"\\nTotal number of translations on documents: {}\".format(\n", 255 | " poller.details.documents_total_count\n", 256 | " )\n", 257 | ")\n", 258 | "\n", 259 | "print(\" {} failed\".format(poller.details.documents_failed_count))\n", 260 | "print(\" {} succeeded\".format(poller.details.documents_succeeded_count))" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 10, 266 | "id": "8529bcda", 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "- Document 1\n", 274 | "Document ID: 009bb304-0000-0000-0000-000000000000\n", 275 | "Document status: Succeeded\n", 276 | "Source document location: https://azurestorageaccountsr.blob.core.windows.net:443/docs/termsconditions_en.pdf\n", 277 | "Translated document location: https://azurestorageaccountsr.blob.core.windows.net:443/docs-translated/termsconditions_en.pdf\n", 278 | "Translated to language: fr\n", 279 | "\n", 280 | "- Document 2\n", 281 | "Document ID: 009bb303-0000-0000-0000-000000000000\n", 282 | "Document status: Succeeded\n", 283 | "Source document location: https://azurestorageaccountsr.blob.core.windows.net:443/docs/releasenotes_en.pdf\n", 284 | "Translated document location: https://azurestorageaccountsr.blob.core.windows.net:443/docs-translated/releasenotes_en.pdf\n", 285 | "Translated to language: fr\n", 286 | "\n", 287 | "- Document 3\n", 288 | "Document ID: 009bb302-0000-0000-0000-000000000000\n", 289 | "Document status: Succeeded\n", 290 | "Source document location: https://azurestorageaccountsr.blob.core.windows.net:443/docs/Microsoft%20Translator%20Customer%20Ready%20Deck%20v4.1.pptx\n", 291 | "Translated document location: https://azurestorageaccountsr.blob.core.windows.net:443/docs-translated/Microsoft%20Translator%20Customer%20Ready%20Deck%20v4.1.pptx\n", 292 | "Translated to language: fr\n", 293 | "\n", 294 | "- Document 4\n", 295 | "Document ID: 009bb301-0000-0000-0000-000000000000\n", 296 | "Document status: Succeeded\n", 297 | "Source document location: https://azurestorageaccountsr.blob.core.windows.net:443/docs/AutoGen%20A%20Multi-Agent%20Framework%20for%20Enabling%20Next-Gen%20AI%20Applications.msg\n", 298 | "Translated document location: https://azurestorageaccountsr.blob.core.windows.net:443/docs-translated/AutoGen%20A%20Multi-Agent%20Framework%20for%20Enabling%20Next-Gen%20AI%20Applications.msg\n", 299 | "Translated to language: fr\n", 300 | "\n" 301 | ] 302 | } 303 | ], 304 | "source": [ 305 | "nb = 1\n", 306 | "\n", 307 | "for document in result:\n", 308 | " print(\"- Document\", nb)\n", 309 | " print(\"Document ID: {}\".format(document.id))\n", 310 | " print(\"Document status: {}\".format(document.status))\n", 311 | "\n", 312 | " if document.status == \"Succeeded\":\n", 313 | " print(\"Source document location: {}\".format(\n", 314 | " document.source_document_url))\n", 315 | " print(\n", 316 | " \"Translated document location: {}\".format(\n", 317 | " document.translated_document_url)\n", 318 | " )\n", 319 | " print(\"Translated to language: {}\\n\".format(document.translated_to))\n", 320 | " nb += 1" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "id": "1e2ebd87", 326 | "metadata": {}, 327 | "source": [ 328 | "## Translated files" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 11, 334 | "id": "ceb3b4b9", 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "name": "stdout", 339 | "output_type": "stream", 340 | "text": [ 341 | "\u001b[1;31;34m\n", 342 | "1 Document: AutoGen A Multi-Agent Framework for Enabling Next-Gen AI Applications.msg\n", 343 | "Date: 2025-03-05 11:17:49\n", 344 | "Size: 128512 bytes (0.12 MB)\n", 345 | "\n", 346 | "2 Document: Microsoft Translator Customer Ready Deck v4.1.pptx\n", 347 | "Date: 2025-03-05 11:17:57\n", 348 | "Size: 16973181 bytes (16.19 MB)\n", 349 | "\n", 350 | "3 Document: releasenotes_en.pdf\n", 351 | "Date: 2025-03-05 11:17:55\n", 352 | "Size: 257886 bytes (0.25 MB)\n", 353 | "\n", 354 | "4 Document: termsconditions_en.pdf\n", 355 | "Date: 2025-03-05 11:17:57\n", 356 | "Size: 387251 bytes (0.37 MB)\n", 357 | "\n" 358 | ] 359 | } 360 | ], 361 | "source": [ 362 | "container_name = \"docs-translated\"\n", 363 | "\n", 364 | "blob_service_client = BlobServiceClient.from_connection_string(blob_results_connection_string)\n", 365 | "container_client = blob_service_client.get_container_client(container_name)\n", 366 | "fileslist = container_client.list_blobs()\n", 367 | "\n", 368 | "nb = 0\n", 369 | "print(\"\\033[1;31;34m\")\n", 370 | "\n", 371 | "for file in fileslist:\n", 372 | " print(f\"{nb+1} Document: {file.name}\")\n", 373 | " print(f\"Date: {file.last_modified.strftime('%Y-%m-%d %H:%M:%S')}\")\n", 374 | " size_mb = file.size / (1024 * 1024)\n", 375 | " print(f\"Size: {file.size} bytes ({size_mb:.2f} MB)\\n\")\n", 376 | " nb += 1" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 12, 382 | "id": "9afaf5d9", 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "Total number of translated files = 4\n" 390 | ] 391 | } 392 | ], 393 | "source": [ 394 | "print(f\"Total number of translated files = {nb}\")" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "id": "2d0f6780", 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [] 404 | } 405 | ], 406 | "metadata": { 407 | "kernelspec": { 408 | "display_name": "Python 3.10 - SDK v2", 409 | "language": "python", 410 | "name": "python310-sdkv2" 411 | }, 412 | "language_info": { 413 | "codemirror_mode": { 414 | "name": "ipython", 415 | "version": 3 416 | }, 417 | "file_extension": ".py", 418 | "mimetype": "text/x-python", 419 | "name": "python", 420 | "nbconvert_exporter": "python", 421 | "pygments_lexer": "ipython3", 422 | "version": "3.10.14" 423 | } 424 | }, 425 | "nbformat": 4, 426 | "nbformat_minor": 5 427 | } 428 | -------------------------------------------------------------------------------- /6 Synchronous Document Translation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "d32b45c0", 6 | "metadata": {}, 7 | "source": [ 8 | "# Synchronous Document Translation\n", 9 | "https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/quickstarts/synchronous-rest-api\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "9956e5e4", 15 | "metadata": {}, 16 | "source": [ 17 | "" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "id": "1fd8bf2f", 23 | "metadata": {}, 24 | "source": [ 25 | "> https://aka.ms/TranslatorLanguageCodes " 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "id": "076c88fc", 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import datetime\n", 36 | "import gradio as gr\n", 37 | "import os\n", 38 | "import pandas as pd\n", 39 | "import requests\n", 40 | "import sys\n", 41 | "import time\n", 42 | "\n", 43 | "from azure.ai.translation.text import TextTranslationClient\n", 44 | "from azure.core.credentials import AzureKeyCredential\n", 45 | "from dotenv import load_dotenv\n", 46 | "from IPython.display import FileLink" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "id": "b93eb65f", 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "'3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]'" 59 | ] 60 | }, 61 | "execution_count": 2, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "sys.version" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "id": "6a8a50d3", 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "Today is 05-Mar-2025 11:18:50\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "print(f\"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}\")" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "dc4937f9", 91 | "metadata": {}, 92 | "source": [ 93 | "## Azure AI Translator credentials" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "id": "fb379d08", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "load_dotenv(\"azure.env\")\n", 104 | "\n", 105 | "azure_ai_translator_key = os.getenv(\"AZURE_AI_TRANSLATION_KEY\")\n", 106 | "azure_ai_translator_endpoint = os.getenv(\"AZURE_AI_TRANSLATION_ENDPOINTDOCUMENT\")\n", 107 | "azure_ai_translator_region = os.getenv(\"AZURE_AI_TRANSLATION_REGION\")" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 5, 113 | "id": "6350f45f", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "source_dir = \"source\"\n", 118 | "target_dir = \"translated\"\n", 119 | "\n", 120 | "os.makedirs(source_dir, exist_ok=True)\n", 121 | "os.makedirs(target_dir, exist_ok=True)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "id": "89435185", 127 | "metadata": {}, 128 | "source": [ 129 | "## Function" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 6, 135 | "id": "746e46c8", 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "def azure_ai_translator(input_file, sourceLanguage, targetLanguage):\n", 140 | " \"\"\"\n", 141 | " Document translation\n", 142 | " \"\"\"\n", 143 | " start = time.time()\n", 144 | " print(f\"Translating the document from {sourceLanguage} to {targetLanguage} ...\")\n", 145 | "\n", 146 | " params = {\n", 147 | " \"sourceLanguage\": sourceLanguage,\n", 148 | " \"targetLanguage\": targetLanguage,\n", 149 | " \"api-version\": \"2023-11-01-preview\",\n", 150 | " }\n", 151 | " \n", 152 | " path = \"translator/document:translate\"\n", 153 | " url = azure_ai_translator_endpoint + path\n", 154 | "\n", 155 | " headers = {\"Ocp-Apim-Subscription-Key\": azure_ai_translator_key}\n", 156 | " \n", 157 | " with open(input_file, \"rb\") as document:\n", 158 | " # Define the data to be sent\n", 159 | " # Find list of supported content types here: https://aka.ms/dtsync-content-type\n", 160 | " data = {\n", 161 | " \"document\": (os.path.basename(input_file),\n", 162 | " document,\n", 163 | " \"application/vnd.openxmlformats-officedocument.wordprocessingml.document\")\n", 164 | " }\n", 165 | "\n", 166 | " # Send the POST request\n", 167 | " response = requests.post(url, headers=headers, files=data, params=params)\n", 168 | "\n", 169 | " # Write the response content to a file\n", 170 | " with open(output_file, \"wb\") as output_document:\n", 171 | " output_document.write(response.content)\n", 172 | "\n", 173 | " elapsed = time.time() - start\n", 174 | " print(\"\\nDone. Elapsed time: \" + time.strftime(\"%H:%M:%S.{}\".format(str(elapsed %\n", 175 | " 1)[2:])[:15], time.gmtime(elapsed)))" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "id": "d047662e", 181 | "metadata": {}, 182 | "source": [ 183 | "## Test 1" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 7, 189 | "id": "a0735ea0", 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "sourceLanguage = \"en\"\n", 194 | "targetLanguage = \"fr\"" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 8, 200 | "id": "6211f8a5", 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "input_file = os.path.join(source_dir, \"worddocument.docx\")\n", 205 | "output_file = os.path.join(target_dir, \"worddocument_translated_\" + targetLanguage + \".docx\")" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 9, 211 | "id": "c53dc6d0", 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/html": [ 217 | "source/worddocument.docx
" 218 | ], 219 | "text/plain": [ 220 | "/mnt/batch/tasks/shared/LS_root/mounts/clusters/seretkow8/code/Users/seretkow/Azure AI Translator/source/worddocument.docx" 221 | ] 222 | }, 223 | "execution_count": 9, 224 | "metadata": {}, 225 | "output_type": "execute_result" 226 | } 227 | ], 228 | "source": [ 229 | "source_link = FileLink(path=os.path.join(\n", 230 | " source_dir, os.path.basename(input_file)))\n", 231 | "source_link" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 10, 237 | "id": "ee63c4a3", 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "Translating the document from en to fr ...\n", 245 | "\n", 246 | "Done. Elapsed time: 00:00:01.063865\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "azure_ai_translator(input_file, sourceLanguage, targetLanguage)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 11, 257 | "id": "8a975ae5", 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "data": { 262 | "text/html": [ 263 | "translated/worddocument_translated_fr.docx
" 264 | ], 265 | "text/plain": [ 266 | "/mnt/batch/tasks/shared/LS_root/mounts/clusters/seretkow8/code/Users/seretkow/Azure AI Translator/translated/worddocument_translated_fr.docx" 267 | ] 268 | }, 269 | "execution_count": 11, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "translated_link = FileLink(path=os.path.join(\n", 276 | " target_dir, os.path.basename(output_file)))\n", 277 | "translated_link" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "id": "efd2d5a5", 283 | "metadata": {}, 284 | "source": [ 285 | "## Test 2" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 12, 291 | "id": "eb5ae4d8", 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "sourceLanguage = \"en\"\n", 296 | "targetLanguage = \"it\"" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 13, 302 | "id": "10990c8a", 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "input_file = os.path.join(source_dir, \"worddocument.docx\")\n", 307 | "output_file = os.path.join(target_dir, \"worddocument_translated_\" + targetLanguage + \".docx\")" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 14, 313 | "id": "226d0b33", 314 | "metadata": {}, 315 | "outputs": [ 316 | { 317 | "name": "stdout", 318 | "output_type": "stream", 319 | "text": [ 320 | "Translating the document from en to it ...\n", 321 | "\n", 322 | "Done. Elapsed time: 00:00:00.649152\n" 323 | ] 324 | } 325 | ], 326 | "source": [ 327 | "azure_ai_translator(input_file, sourceLanguage, targetLanguage)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 15, 333 | "id": "850df4b8", 334 | "metadata": {}, 335 | "outputs": [ 336 | { 337 | "data": { 338 | "text/html": [ 339 | "translated/worddocument_translated_it.docx
" 340 | ], 341 | "text/plain": [ 342 | "/mnt/batch/tasks/shared/LS_root/mounts/clusters/seretkow8/code/Users/seretkow/Azure AI Translator/translated/worddocument_translated_it.docx" 343 | ] 344 | }, 345 | "execution_count": 15, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "translated_link = FileLink(path=os.path.join(\n", 352 | " target_dir, os.path.basename(output_file)))\n", 353 | "translated_link" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "id": "21ac8bea", 359 | "metadata": {}, 360 | "source": [ 361 | "## Test 3" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 16, 367 | "id": "ece8e6c6", 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "sourceLanguage = \"en\"\n", 372 | "targetLanguage = \"ar\"" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 17, 378 | "id": "57238ffb", 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "input_file = os.path.join(source_dir, \"worddocument.docx\")\n", 383 | "output_file = os.path.join(target_dir, \"worddocument_translated_\" + targetLanguage + \".docx\")" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 18, 389 | "id": "3867f664", 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "Translating the document from en to ar ...\n", 397 | "\n", 398 | "Done. Elapsed time: 00:00:00.732519\n" 399 | ] 400 | } 401 | ], 402 | "source": [ 403 | "azure_ai_translator(input_file, sourceLanguage, targetLanguage)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 19, 409 | "id": "bfe308ae", 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "data": { 414 | "text/html": [ 415 | "translated/worddocument_translated_ar.docx
" 416 | ], 417 | "text/plain": [ 418 | "/mnt/batch/tasks/shared/LS_root/mounts/clusters/seretkow8/code/Users/seretkow/Azure AI Translator/translated/worddocument_translated_ar.docx" 419 | ] 420 | }, 421 | "execution_count": 19, 422 | "metadata": {}, 423 | "output_type": "execute_result" 424 | } 425 | ], 426 | "source": [ 427 | "translated_link = FileLink(path=os.path.join(\n", 428 | " target_dir, os.path.basename(output_file)))\n", 429 | "translated_link" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "id": "d532a72b", 435 | "metadata": {}, 436 | "source": [ 437 | "## Test 4" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 20, 443 | "id": "10c2a6dd", 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "sourceLanguage = \"en\"\n", 448 | "targetLanguage = \"zh-Hans\"" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 21, 454 | "id": "ada8a706", 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "input_file = os.path.join(source_dir, \"worddocument.docx\")\n", 459 | "output_file = os.path.join(target_dir, \"worddocument_translated_\" + targetLanguage + \".docx\")" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 22, 465 | "id": "e9c953f3", 466 | "metadata": {}, 467 | "outputs": [ 468 | { 469 | "name": "stdout", 470 | "output_type": "stream", 471 | "text": [ 472 | "Translating the document from en to zh-Hans ...\n", 473 | "\n", 474 | "Done. Elapsed time: 00:00:00.620569\n" 475 | ] 476 | } 477 | ], 478 | "source": [ 479 | "azure_ai_translator(input_file, sourceLanguage, targetLanguage)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 23, 485 | "id": "636598ce", 486 | "metadata": {}, 487 | "outputs": [ 488 | { 489 | "data": { 490 | "text/html": [ 491 | "translated/worddocument_translated_zh-Hans.docx
" 492 | ], 493 | "text/plain": [ 494 | "/mnt/batch/tasks/shared/LS_root/mounts/clusters/seretkow8/code/Users/seretkow/Azure AI Translator/translated/worddocument_translated_zh-Hans.docx" 495 | ] 496 | }, 497 | "execution_count": 23, 498 | "metadata": {}, 499 | "output_type": "execute_result" 500 | } 501 | ], 502 | "source": [ 503 | "translated_link = FileLink(path=os.path.join(\n", 504 | " target_dir, os.path.basename(output_file)))\n", 505 | "translated_link" 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "id": "22cab5a7", 511 | "metadata": {}, 512 | "source": [ 513 | "## Results" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": 24, 519 | "id": "6cf8824f", 520 | "metadata": {}, 521 | "outputs": [ 522 | { 523 | "name": "stdout", 524 | "output_type": "stream", 525 | "text": [ 526 | "total 161K\n", 527 | "-rwxrwxrwx 1 root root 41K Mar 5 11:18 worddocument_translated_ar.docx\n", 528 | "-rwxrwxrwx 1 root root 40K Mar 5 11:18 worddocument_translated_fr.docx\n", 529 | "-rwxrwxrwx 1 root root 40K Mar 5 11:18 worddocument_translated_it.docx\n", 530 | "-rwxrwxrwx 1 root root 41K Mar 5 11:18 worddocument_translated_zh-Hans.docx\n" 531 | ] 532 | } 533 | ], 534 | "source": [ 535 | "!ls $target_dir -lh" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "id": "2d7835c3", 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "id": "15bd10f7", 550 | "metadata": {}, 551 | "outputs": [], 552 | "source": [] 553 | } 554 | ], 555 | "metadata": { 556 | "kernelspec": { 557 | "display_name": "Python 3.10 - SDK v2", 558 | "language": "python", 559 | "name": "python310-sdkv2" 560 | }, 561 | "language_info": { 562 | "codemirror_mode": { 563 | "name": "ipython", 564 | "version": 3 565 | }, 566 | "file_extension": ".py", 567 | "mimetype": "text/x-python", 568 | "name": "python", 569 | "nbconvert_exporter": "python", 570 | "pygments_lexer": "ipython3", 571 | "version": "3.10.14" 572 | } 573 | }, 574 | "nbformat": 4, 575 | "nbformat_minor": 5 576 | } 577 | -------------------------------------------------------------------------------- /4 Translation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "69571d21", 6 | "metadata": {}, 7 | "source": [ 8 | "# Translation with Azure AI Translator\n", 9 | "\n", 10 | "https://learn.microsoft.com/en-us/azure/ai-services/translator/text-translation-overview" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "fdfb04c7", 16 | "metadata": {}, 17 | "source": [ 18 | "" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "id": "5b1ec5fd", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import datetime\n", 29 | "import gradio as gr\n", 30 | "import pandas as pd\n", 31 | "import sys\n", 32 | "\n", 33 | "from azure.ai.translation.text import TextTranslationClient\n", 34 | "from azure.core.credentials import AzureKeyCredential\n", 35 | "from azure.ai.translation.text.models import InputTextItem\n", 36 | "from azure.core.exceptions import HttpResponseError\n", 37 | "from dotenv import load_dotenv" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "id": "ca2fe57d", 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "'3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]'" 50 | ] 51 | }, 52 | "execution_count": 2, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "sys.version" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "id": "18158f38", 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "Today is 05-Mar-2025 11:14:13\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "print(f\"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}\")" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "id": "6ebab494", 82 | "metadata": {}, 83 | "source": [ 84 | "## Azure AI Translator credentials" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 4, 90 | "id": "6ed00739", 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "load_dotenv(\"azure.env\")\n", 95 | "\n", 96 | "azure_ai_translator_key = os.getenv(\"AZURE_AI_TRANSLATION_KEY\")\n", 97 | "azure_ai_translator_region = os.getenv(\"AZURE_AI_TRANSLATION_REGION\")" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "id": "fd76cc61", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "credential = AzureKeyCredential(azure_ai_translator_key)\n", 108 | "\n", 109 | "text_translator = TextTranslationClient(\n", 110 | " credential=credential,\n", 111 | " region=azure_ai_translator_region\n", 112 | ")" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 6, 118 | "id": "a9a16b1c-40b2-4223-ad7b-17224049f504", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "response = text_translator.get_supported_languages()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 7, 128 | "id": "51b88bbf-191e-42e6-a720-a5fc129cb5c0", 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "def print_languages(label, languages):\n", 133 | " \"\"\"\n", 134 | " Print supported languages of Azure AI Translator\n", 135 | " \"\"\"\n", 136 | " print(\"\\033[1;31;34m\")\n", 137 | " \n", 138 | " if languages is not None:\n", 139 | " print(f\"Number of supported {label} languages = {len(languages)}\\n\")\n", 140 | " print(f\"{label.capitalize()} languages:\")\n", 141 | "\n", 142 | " for idx, (key, value) in enumerate(languages.items(), start=1):\n", 143 | " print(f\"{idx:03}\\t{key:10} {value.name} ({value.native_name})\")\n", 144 | "\n", 145 | " else:\n", 146 | " print(f\"No supported {label} languages.\")" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 8, 152 | "id": "7fb02043-220d-48e0-a4a4-3fdf158b36da", 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "\u001b[1;31;34m\n", 160 | "Number of supported translation languages = 135\n", 161 | "\n", 162 | "Translation languages:\n", 163 | "001\taf Afrikaans (Afrikaans)\n", 164 | "002\tam Amharic (አማርኛ)\n", 165 | "003\tar Arabic (العربية)\n", 166 | "004\tas Assamese (অসমীয়া)\n", 167 | "005\taz Azerbaijani (Azərbaycan)\n", 168 | "006\tba Bashkir (Bashkir)\n", 169 | "007\tbg Bulgarian (Български)\n", 170 | "008\tbho Bhojpuri (भोजपुरी)\n", 171 | "009\tbn Bangla (বাংলা)\n", 172 | "010\tbo Tibetan (བོད་སྐད་)\n", 173 | "011\tbrx Bodo (बड़ो)\n", 174 | "012\tbs Bosnian (Bosanski)\n", 175 | "013\tca Catalan (Català)\n", 176 | "014\tcs Czech (Čeština)\n", 177 | "015\tcy Welsh (Cymraeg)\n", 178 | "016\tda Danish (Dansk)\n", 179 | "017\tde German (Deutsch)\n", 180 | "018\tdoi Dogri (डोगरी)\n", 181 | "019\tdsb Lower Sorbian (Dolnoserbšćina)\n", 182 | "020\tdv Divehi (ދިވެހިބަސް)\n", 183 | "021\tel Greek (Ελληνικά)\n", 184 | "022\ten English (English)\n", 185 | "023\tes Spanish (Español)\n", 186 | "024\tet Estonian (Eesti)\n", 187 | "025\teu Basque (Euskara)\n", 188 | "026\tfa Persian (فارسی)\n", 189 | "027\tfi Finnish (Suomi)\n", 190 | "028\tfil Filipino (Filipino)\n", 191 | "029\tfj Fijian (Na Vosa Vakaviti)\n", 192 | "030\tfo Faroese (Føroyskt)\n", 193 | "031\tfr French (Français)\n", 194 | "032\tfr-CA French (Canada) (Français (Canada))\n", 195 | "033\tga Irish (Gaeilge)\n", 196 | "034\tgl Galician (Galego)\n", 197 | "035\tgom Konkani (कोंकणी)\n", 198 | "036\tgu Gujarati (ગુજરાતી)\n", 199 | "037\tha Hausa (Hausa)\n", 200 | "038\the Hebrew (עברית)\n", 201 | "039\thi Hindi (हिन्दी)\n", 202 | "040\thne Chhattisgarhi (छत्तीसगढ़ी)\n", 203 | "041\thr Croatian (Hrvatski)\n", 204 | "042\thsb Upper Sorbian (Hornjoserbšćina)\n", 205 | "043\tht Haitian Creole (Haitian Creole)\n", 206 | "044\thu Hungarian (Magyar)\n", 207 | "045\thy Armenian (Հայերեն)\n", 208 | "046\tid Indonesian (Indonesia)\n", 209 | "047\tig Igbo (Ásụ̀sụ́ Ìgbò)\n", 210 | "048\tikt Inuinnaqtun (Inuinnaqtun)\n", 211 | "049\tis Icelandic (Íslenska)\n", 212 | "050\tit Italian (Italiano)\n", 213 | "051\tiu Inuktitut (ᐃᓄᒃᑎᑐᑦ)\n", 214 | "052\tiu-Latn Inuktitut (Latin) (Inuktitut (Latin))\n", 215 | "053\tja Japanese (日本語)\n", 216 | "054\tka Georgian (ქართული)\n", 217 | "055\tkk Kazakh (Қазақ Тілі)\n", 218 | "056\tkm Khmer (ខ្មែរ)\n", 219 | "057\tkmr Kurdish (Northern) (Kurdî (Bakur))\n", 220 | "058\tkn Kannada (ಕನ್ನಡ)\n", 221 | "059\tko Korean (한국어)\n", 222 | "060\tks Kashmiri (کٲشُر)\n", 223 | "061\tku Kurdish (Central) (Kurdî (Navîn))\n", 224 | "062\tky Kyrgyz (Кыргызча)\n", 225 | "063\tln Lingala (Lingála)\n", 226 | "064\tlo Lao (ລາວ)\n", 227 | "065\tlt Lithuanian (Lietuvių)\n", 228 | "066\tlug Ganda (Ganda)\n", 229 | "067\tlv Latvian (Latviešu)\n", 230 | "068\tlzh Chinese (Literary) (中文 (文言文))\n", 231 | "069\tmai Maithili (मैथिली)\n", 232 | "070\tmg Malagasy (Malagasy)\n", 233 | "071\tmi Māori (Te Reo Māori)\n", 234 | "072\tmk Macedonian (Македонски)\n", 235 | "073\tml Malayalam (മലയാളം)\n", 236 | "074\tmn-Cyrl Mongolian (Cyrillic) (Монгол)\n", 237 | "075\tmn-Mong Mongolian (Traditional) (ᠮᠣᠩᠭᠣᠯ ᠬᠡᠯᠡ)\n", 238 | "076\tmni Manipuri (ꯃꯩꯇꯩꯂꯣꯟ)\n", 239 | "077\tmr Marathi (मराठी)\n", 240 | "078\tms Malay (Melayu)\n", 241 | "079\tmt Maltese (Malti)\n", 242 | "080\tmww Hmong Daw (Hmong Daw)\n", 243 | "081\tmy Myanmar (Burmese) (မြန်မာ)\n", 244 | "082\tnb Norwegian (Norsk Bokmål)\n", 245 | "083\tne Nepali (नेपाली)\n", 246 | "084\tnl Dutch (Nederlands)\n", 247 | "085\tnso Sesotho sa Leboa (Sesotho sa Leboa)\n", 248 | "086\tnya Nyanja (Nyanja)\n", 249 | "087\tor Odia (ଓଡ଼ିଆ)\n", 250 | "088\totq Querétaro Otomi (Hñähñu)\n", 251 | "089\tpa Punjabi (ਪੰਜਾਬੀ)\n", 252 | "090\tpl Polish (Polski)\n", 253 | "091\tprs Dari (دری)\n", 254 | "092\tps Pashto (پښتو)\n", 255 | "093\tpt Portuguese (Brazil) (Português (Brasil))\n", 256 | "094\tpt-PT Portuguese (Portugal) (Português (Portugal))\n", 257 | "095\tro Romanian (Română)\n", 258 | "096\tru Russian (Русский)\n", 259 | "097\trun Rundi (Rundi)\n", 260 | "098\trw Kinyarwanda (Kinyarwanda)\n", 261 | "099\tsd Sindhi (سنڌي)\n", 262 | "100\tsi Sinhala (සිංහල)\n", 263 | "101\tsk Slovak (Slovenčina)\n", 264 | "102\tsl Slovenian (Slovenščina)\n", 265 | "103\tsm Samoan (Gagana Sāmoa)\n", 266 | "104\tsn Shona (chiShona)\n", 267 | "105\tso Somali (Soomaali)\n", 268 | "106\tsq Albanian (Shqip)\n", 269 | "107\tsr-Cyrl Serbian (Cyrillic) (Српски (ћирилица))\n", 270 | "108\tsr-Latn Serbian (Latin) (Srpski (latinica))\n", 271 | "109\tst Sesotho (Sesotho)\n", 272 | "110\tsv Swedish (Svenska)\n", 273 | "111\tsw Swahili (Kiswahili)\n", 274 | "112\tta Tamil (தமிழ்)\n", 275 | "113\tte Telugu (తెలుగు)\n", 276 | "114\tth Thai (ไทย)\n", 277 | "115\tti Tigrinya (ትግር)\n", 278 | "116\ttk Turkmen (Türkmen Dili)\n", 279 | "117\ttlh-Latn Klingon (Latin) (Klingon (Latin))\n", 280 | "118\ttlh-Piqd Klingon (pIqaD) (Klingon (pIqaD))\n", 281 | "119\ttn Setswana (Setswana)\n", 282 | "120\tto Tongan (Lea Fakatonga)\n", 283 | "121\ttr Turkish (Türkçe)\n", 284 | "122\ttt Tatar (Татар)\n", 285 | "123\tty Tahitian (Reo Tahiti)\n", 286 | "124\tug Uyghur (ئۇيغۇرچە)\n", 287 | "125\tuk Ukrainian (Українська)\n", 288 | "126\tur Urdu (اردو)\n", 289 | "127\tuz Uzbek (Latin) (O‘Zbek)\n", 290 | "128\tvi Vietnamese (Tiếng Việt)\n", 291 | "129\txh Xhosa (isiXhosa)\n", 292 | "130\tyo Yoruba (Èdè Yorùbá)\n", 293 | "131\tyua Yucatec Maya (Yucatec Maya)\n", 294 | "132\tyue Cantonese (Traditional) (粵語 (繁體))\n", 295 | "133\tzh-Hans Chinese Simplified (中文 (简体))\n", 296 | "134\tzh-Hant Chinese Traditional (繁體中文 (繁體))\n", 297 | "135\tzu Zulu (Isi-Zulu)\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "print_languages(\"translation\", response.translation)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 9, 308 | "id": "14f0b8a7-c2c0-457d-8d69-5edc31839831", 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/html": [ 314 | "
\n", 315 | "\n", 328 | "\n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | "
Language_CodeLanguage_NameNative_Name
0afAfrikaansAfrikaans
1amAmharicአማርኛ
2arArabicالعربية
3asAssameseঅসমীয়া
4azAzerbaijaniAzərbaycan
............
130yuaYucatec MayaYucatec Maya
131yueCantonese (Traditional)粵語 (繁體)
132zh-HansChinese Simplified中文 (简体)
133zh-HantChinese Traditional繁體中文 (繁體)
134zuZuluIsi-Zulu
\n", 406 | "

135 rows × 3 columns

\n", 407 | "
" 408 | ], 409 | "text/plain": [ 410 | " Language_Code Language_Name Native_Name\n", 411 | "0 af Afrikaans Afrikaans\n", 412 | "1 am Amharic አማርኛ\n", 413 | "2 ar Arabic العربية\n", 414 | "3 as Assamese অসমীয়া\n", 415 | "4 az Azerbaijani Azərbaycan\n", 416 | ".. ... ... ...\n", 417 | "130 yua Yucatec Maya Yucatec Maya\n", 418 | "131 yue Cantonese (Traditional) 粵語 (繁體)\n", 419 | "132 zh-Hans Chinese Simplified 中文 (简体)\n", 420 | "133 zh-Hant Chinese Traditional 繁體中文 (繁體)\n", 421 | "134 zu Zulu Isi-Zulu\n", 422 | "\n", 423 | "[135 rows x 3 columns]" 424 | ] 425 | }, 426 | "execution_count": 9, 427 | "metadata": {}, 428 | "output_type": "execute_result" 429 | } 430 | ], 431 | "source": [ 432 | "lang_list = []\n", 433 | "\n", 434 | "if response.translation is not None:\n", 435 | " for key, value in response.translation.items():\n", 436 | " lang_list.append(\n", 437 | " {\n", 438 | " \"Language_Code\": key,\n", 439 | " \"Language_Name\": value.name,\n", 440 | " \"Native_Name\": value.native_name,\n", 441 | " }\n", 442 | " )\n", 443 | "\n", 444 | "df_languages = pd.DataFrame(lang_list)\n", 445 | "df_languages" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 10, 451 | "id": "9c65648e-77ea-4a6f-9955-5a49b304983d", 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "# Get dict from dataframe\n", 456 | "language_dict = df_languages.set_index(\"Language_Code\")[\"Language_Name\"].to_dict()\n", 457 | "# Full names list\n", 458 | "language_full_names = list(language_dict.values())\n", 459 | "# Sort\n", 460 | "language_full_names.sort()\n", 461 | "# Language codes list\n", 462 | "language_codes = list(language_dict.keys())\n", 463 | "# Sort\n", 464 | "language_codes.sort()\n", 465 | "# Creation of dict\n", 466 | "reverse_language_names = {v: k for k, v in language_dict.items()}" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 11, 472 | "id": "a9518185-eeb8-4d7d-83d9-ffb6d0d99d33", 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "def get_language_code(language_name):\n", 477 | " \"\"\"\n", 478 | " Function to get language code by language name\n", 479 | " Input: language full name\n", 480 | " Output: language code name\n", 481 | " \"\"\"\n", 482 | " return reverse_language_names.get(language_name)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 12, 488 | "id": "6cf5031b-7474-461b-b668-aef100c4e4cd", 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "name": "stdout", 493 | "output_type": "stream", 494 | "text": [ 495 | "en\n" 496 | ] 497 | } 498 | ], 499 | "source": [ 500 | "print(get_language_code(\"English\"))" 501 | ] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "id": "276a5455", 506 | "metadata": {}, 507 | "source": [ 508 | "## Testing" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": 13, 514 | "id": "6a2c417a-5fba-4113-bbe9-ad33b93c97c6", 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "def azure_ai_translator(mytext, source_lang, target_lang):\n", 519 | " \"\"\"\n", 520 | " Translates text from one language to another using Azure AI Translator.\n", 521 | " \n", 522 | " Args:\n", 523 | " mytext (str): The text to be translated.\n", 524 | " source_lang (str): The full name of the source language.\n", 525 | " target_lang (str): The full name of the target language.\n", 526 | " \n", 527 | " Returns:\n", 528 | " str: The translated text, or None if an error occurs.\n", 529 | " \"\"\"\n", 530 | " try:\n", 531 | " credential = AzureKeyCredential(azure_ai_translator_key)\n", 532 | " \n", 533 | " text_translator = TextTranslationClient(\n", 534 | " credential=credential, region=azure_ai_translator_region)\n", 535 | " input_text_elements = [mytext]\n", 536 | "\n", 537 | " # Get language codes\n", 538 | " source_lang_code = get_language_code(source_lang)\n", 539 | " target_lang_code = [get_language_code(target_lang)]\n", 540 | " \n", 541 | " # Response\n", 542 | " response = text_translator.translate(body=input_text_elements,\n", 543 | " to_language=target_lang_code)\n", 544 | " translation = response[0] if response else None\n", 545 | "\n", 546 | " if translation:\n", 547 | " detected_language = translation.detected_language\n", 548 | " if detected_language:\n", 549 | " print(f\"Detected languages of the input text: {detected_language.language} with score = {detected_language.score}.\")\n", 550 | " for translated_text in translation.translations:\n", 551 | " print(f\"\\nText to translate to: '{translated_text.to}'\")\n", 552 | " return translated_text.text\n", 553 | "\n", 554 | " except HttpResponseError as exception:\n", 555 | " if exception.error is not None:\n", 556 | " print(f\"Error Code: {exception.error.code}\")\n", 557 | " print(f\"Message: {exception.error.message}\")" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 14, 563 | "id": "310647b9-d414-42e6-add7-d135e3949525", 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "name": "stdout", 568 | "output_type": "stream", 569 | "text": [ 570 | "Detected languages of the input text: en with score = 1.0.\n", 571 | "\n", 572 | "Text to translate to: 'fr'\n", 573 | "Azure est une plateforme de cloud computing gérée par Microsoft. Elle offre l’accès, la gestion et le développement d’applications et de services par le biais de centres de données mondiaux\n" 574 | ] 575 | } 576 | ], 577 | "source": [ 578 | "mytext = \"Azure is a cloud computing platform run by Microsoft. It offers access, management, and the development of applications and services through global data centers\"\n", 579 | "\n", 580 | "result = azure_ai_translator(mytext, \"English\", \"French\")\n", 581 | "print(result)" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 15, 587 | "id": "21f06c2c", 588 | "metadata": {}, 589 | "outputs": [ 590 | { 591 | "name": "stdout", 592 | "output_type": "stream", 593 | "text": [ 594 | "Detected languages of the input text: fr with score = 1.0.\n", 595 | "\n", 596 | "Text to translate to: 'it'\n", 597 | "Qual è il tempo di viaggio da Parigi a Versailles?\n" 598 | ] 599 | } 600 | ], 601 | "source": [ 602 | "mytext = \"Quel est le temps de trajet de Paris à Versailles ?\"\n", 603 | "\n", 604 | "result = azure_ai_translator(mytext, \"French\", \"Italian\")\n", 605 | "print(result)" 606 | ] 607 | }, 608 | { 609 | "cell_type": "markdown", 610 | "id": "2f2d34aa", 611 | "metadata": {}, 612 | "source": [ 613 | "## Webapp" 614 | ] 615 | }, 616 | { 617 | "cell_type": "markdown", 618 | "id": "8084d4d6", 619 | "metadata": {}, 620 | "source": [ 621 | "https://learn.microsoft.com/en-us/azure/ai-services/translator/language-support" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 18, 627 | "id": "6440cecc", 628 | "metadata": {}, 629 | "outputs": [ 630 | { 631 | "name": "stdout", 632 | "output_type": "stream", 633 | "text": [ 634 | "* Running on local URL: http://127.0.0.1:7863\n", 635 | "* Running on public URL: https://1e5738d4d8114a12b0.gradio.live\n", 636 | "\n", 637 | "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n" 638 | ] 639 | }, 640 | { 641 | "data": { 642 | "text/html": [ 643 | "
" 644 | ], 645 | "text/plain": [ 646 | "" 647 | ] 648 | }, 649 | "metadata": {}, 650 | "output_type": "display_data" 651 | }, 652 | { 653 | "data": { 654 | "text/plain": [] 655 | }, 656 | "execution_count": 18, 657 | "metadata": {}, 658 | "output_type": "execute_result" 659 | }, 660 | { 661 | "name": "stdout", 662 | "output_type": "stream", 663 | "text": [ 664 | "Detected languages of the input text: en with score = 1.0.\n", 665 | "\n", 666 | "Text to translate to: 'fr'\n", 667 | "Detected languages of the input text: es with score = 1.0.\n", 668 | "\n", 669 | "Text to translate to: 'it'\n", 670 | "Detected languages of the input text: fr with score = 1.0.\n", 671 | "\n", 672 | "Text to translate to: 'ar'\n" 673 | ] 674 | } 675 | ], 676 | "source": [ 677 | "image_url = \"https://github.com/retkowsky/azure-ai-translator/blob/main/logo.jpg?raw=true\"\n", 678 | "logo = \"
\".format(image_url)\n", 679 | "\n", 680 | "translator_webapp = gr.Interface(\n", 681 | " fn=azure_ai_translator,\n", 682 | " inputs=[\n", 683 | " gr.components.Textbox(label=\"Text to translate\"),\n", 684 | " gr.components.Dropdown(label=\"Source language\",\n", 685 | " choices=language_full_names),\n", 686 | " gr.components.Dropdown(label=\"Target language\",\n", 687 | " choices=language_full_names),\n", 688 | " ],\n", 689 | " outputs=gr.Text(label=\"Translated text\"),\n", 690 | " cache_examples=False,\n", 691 | " title=\"Azure AI Translator\",\n", 692 | " description=logo,\n", 693 | " #theme=\"JohnSmith9982/small_and_pretty\",\n", 694 | " examples=[\n", 695 | " [\n", 696 | " \"Hello. Welcome to this presentation of Azure AI Translator\",\n", 697 | " \"English\",\n", 698 | " \"French\",\n", 699 | " ],\n", 700 | " [\n", 701 | " \"Hola. Bienvenidos a esta presentación de Azure AI Translator\",\n", 702 | " \"Spanish\",\n", 703 | " \"Italian\",\n", 704 | " ],\n", 705 | " [\n", 706 | " \"Bonjour à tous.\",\n", 707 | " \"French\",\n", 708 | " \"Arabic\",\n", 709 | " ],\n", 710 | " ],\n", 711 | ")\n", 712 | "\n", 713 | "translator_webapp.launch(share=True)" 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "execution_count": null, 719 | "id": "18f9f9d2", 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [] 723 | }, 724 | { 725 | "cell_type": "code", 726 | "execution_count": null, 727 | "id": "7b1a5de3", 728 | "metadata": {}, 729 | "outputs": [], 730 | "source": [] 731 | } 732 | ], 733 | "metadata": { 734 | "kernelspec": { 735 | "display_name": "Python 3.10 - SDK v2", 736 | "language": "python", 737 | "name": "python310-sdkv2" 738 | }, 739 | "language_info": { 740 | "codemirror_mode": { 741 | "name": "ipython", 742 | "version": 3 743 | }, 744 | "file_extension": ".py", 745 | "mimetype": "text/x-python", 746 | "name": "python", 747 | "nbconvert_exporter": "python", 748 | "pygments_lexer": "ipython3", 749 | "version": "3.10.14" 750 | } 751 | }, 752 | "nbformat": 4, 753 | "nbformat_minor": 5 754 | } 755 | -------------------------------------------------------------------------------- /1 Azure AI Translator informations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a5c4560e", 6 | "metadata": {}, 7 | "source": [ 8 | "# Azure AI Translator informations\n", 9 | "https://learn.microsoft.com/en-us/azure/ai-services/translator/translator-overview\n", 10 | "\n", 11 | "" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "id": "350f6a1f-3dae-45f4-9218-b4ef292c8894", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "#%pip install azure-ai-translation-text" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "id": "91e73e5c-c51d-4646-9a34-47e2da1d1378", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "#%pip install azure-ai-translation-document==1.1.0b1" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "id": "d591b8aa", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import datetime\n", 42 | "import pandas as pd\n", 43 | "import sys\n", 44 | "\n", 45 | "from azure.ai.translation.text import TextTranslationClient\n", 46 | "from azure.core.credentials import AzureKeyCredential\n", 47 | "from azure.ai.translation.text.models import InputTextItem\n", 48 | "from azure.core.exceptions import HttpResponseError\n", 49 | "from dotenv import load_dotenv" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "id": "53c7c758", 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "'3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]'" 62 | ] 63 | }, 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "sys.version" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "id": "fb41d891", 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "Today is 05-Mar-2025 11:11:27\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "print(f\"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}\")" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "id": "ce33a4d2", 94 | "metadata": {}, 95 | "source": [ 96 | "## Azure AI Translator credentials" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 6, 102 | "id": "d8306c39", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "load_dotenv(\"azure.env\")\n", 107 | "\n", 108 | "azure_ai_translator_key = os.getenv(\"AZURE_AI_TRANSLATION_KEY\")\n", 109 | "azure_ai_translator_region = os.getenv(\"AZURE_AI_TRANSLATION_REGION\")" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "id": "b56d3379", 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "credential = AzureKeyCredential(azure_ai_translator_key)\n", 120 | "\n", 121 | "text_translator = TextTranslationClient(\n", 122 | " credential=credential,\n", 123 | " region=azure_ai_translator_region\n", 124 | ")" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "id": "d13f1156", 130 | "metadata": {}, 131 | "source": [ 132 | "## Informations" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 8, 138 | "id": "e3655972", 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "Number of supported languages for translate operation: 135\n", 146 | "Number of supported languages for transliterate operation: 42\n", 147 | "Number of supported languages for dictionary operations: 50\n", 148 | "\n", 149 | "Translation Languages:\n", 150 | "1 af -- name: Afrikaans (Afrikaans)\n", 151 | "2 am -- name: Amharic (አማርኛ)\n", 152 | "3 ar -- name: Arabic (العربية)\n", 153 | "4 as -- name: Assamese (অসমীয়া)\n", 154 | "5 az -- name: Azerbaijani (Azərbaycan)\n", 155 | "6 ba -- name: Bashkir (Bashkir)\n", 156 | "7 bg -- name: Bulgarian (Български)\n", 157 | "8 bho -- name: Bhojpuri (भोजपुरी)\n", 158 | "9 bn -- name: Bangla (বাংলা)\n", 159 | "10 bo -- name: Tibetan (བོད་སྐད་)\n", 160 | "11 brx -- name: Bodo (बड़ो)\n", 161 | "12 bs -- name: Bosnian (Bosanski)\n", 162 | "13 ca -- name: Catalan (Català)\n", 163 | "14 cs -- name: Czech (Čeština)\n", 164 | "15 cy -- name: Welsh (Cymraeg)\n", 165 | "16 da -- name: Danish (Dansk)\n", 166 | "17 de -- name: German (Deutsch)\n", 167 | "18 doi -- name: Dogri (डोगरी)\n", 168 | "19 dsb -- name: Lower Sorbian (Dolnoserbšćina)\n", 169 | "20 dv -- name: Divehi (ދިވެހިބަސް)\n", 170 | "21 el -- name: Greek (Ελληνικά)\n", 171 | "22 en -- name: English (English)\n", 172 | "23 es -- name: Spanish (Español)\n", 173 | "24 et -- name: Estonian (Eesti)\n", 174 | "25 eu -- name: Basque (Euskara)\n", 175 | "26 fa -- name: Persian (فارسی)\n", 176 | "27 fi -- name: Finnish (Suomi)\n", 177 | "28 fil -- name: Filipino (Filipino)\n", 178 | "29 fj -- name: Fijian (Na Vosa Vakaviti)\n", 179 | "30 fo -- name: Faroese (Føroyskt)\n", 180 | "31 fr -- name: French (Français)\n", 181 | "32 fr-CA -- name: French (Canada) (Français (Canada))\n", 182 | "33 ga -- name: Irish (Gaeilge)\n", 183 | "34 gl -- name: Galician (Galego)\n", 184 | "35 gom -- name: Konkani (कोंकणी)\n", 185 | "36 gu -- name: Gujarati (ગુજરાતી)\n", 186 | "37 ha -- name: Hausa (Hausa)\n", 187 | "38 he -- name: Hebrew (עברית)\n", 188 | "39 hi -- name: Hindi (हिन्दी)\n", 189 | "40 hne -- name: Chhattisgarhi (छत्तीसगढ़ी)\n", 190 | "41 hr -- name: Croatian (Hrvatski)\n", 191 | "42 hsb -- name: Upper Sorbian (Hornjoserbšćina)\n", 192 | "43 ht -- name: Haitian Creole (Haitian Creole)\n", 193 | "44 hu -- name: Hungarian (Magyar)\n", 194 | "45 hy -- name: Armenian (Հայերեն)\n", 195 | "46 id -- name: Indonesian (Indonesia)\n", 196 | "47 ig -- name: Igbo (Ásụ̀sụ́ Ìgbò)\n", 197 | "48 ikt -- name: Inuinnaqtun (Inuinnaqtun)\n", 198 | "49 is -- name: Icelandic (Íslenska)\n", 199 | "50 it -- name: Italian (Italiano)\n", 200 | "51 iu -- name: Inuktitut (ᐃᓄᒃᑎᑐᑦ)\n", 201 | "52 iu-Latn -- name: Inuktitut (Latin) (Inuktitut (Latin))\n", 202 | "53 ja -- name: Japanese (日本語)\n", 203 | "54 ka -- name: Georgian (ქართული)\n", 204 | "55 kk -- name: Kazakh (Қазақ Тілі)\n", 205 | "56 km -- name: Khmer (ខ្មែរ)\n", 206 | "57 kmr -- name: Kurdish (Northern) (Kurdî (Bakur))\n", 207 | "58 kn -- name: Kannada (ಕನ್ನಡ)\n", 208 | "59 ko -- name: Korean (한국어)\n", 209 | "60 ks -- name: Kashmiri (کٲشُر)\n", 210 | "61 ku -- name: Kurdish (Central) (Kurdî (Navîn))\n", 211 | "62 ky -- name: Kyrgyz (Кыргызча)\n", 212 | "63 ln -- name: Lingala (Lingála)\n", 213 | "64 lo -- name: Lao (ລາວ)\n", 214 | "65 lt -- name: Lithuanian (Lietuvių)\n", 215 | "66 lug -- name: Ganda (Ganda)\n", 216 | "67 lv -- name: Latvian (Latviešu)\n", 217 | "68 lzh -- name: Chinese (Literary) (中文 (文言文))\n", 218 | "69 mai -- name: Maithili (मैथिली)\n", 219 | "70 mg -- name: Malagasy (Malagasy)\n", 220 | "71 mi -- name: Māori (Te Reo Māori)\n", 221 | "72 mk -- name: Macedonian (Македонски)\n", 222 | "73 ml -- name: Malayalam (മലയാളം)\n", 223 | "74 mn-Cyrl -- name: Mongolian (Cyrillic) (Монгол)\n", 224 | "75 mn-Mong -- name: Mongolian (Traditional) (ᠮᠣᠩᠭᠣᠯ ᠬᠡᠯᠡ)\n", 225 | "76 mni -- name: Manipuri (ꯃꯩꯇꯩꯂꯣꯟ)\n", 226 | "77 mr -- name: Marathi (मराठी)\n", 227 | "78 ms -- name: Malay (Melayu)\n", 228 | "79 mt -- name: Maltese (Malti)\n", 229 | "80 mww -- name: Hmong Daw (Hmong Daw)\n", 230 | "81 my -- name: Myanmar (Burmese) (မြန်မာ)\n", 231 | "82 nb -- name: Norwegian (Norsk Bokmål)\n", 232 | "83 ne -- name: Nepali (नेपाली)\n", 233 | "84 nl -- name: Dutch (Nederlands)\n", 234 | "85 nso -- name: Sesotho sa Leboa (Sesotho sa Leboa)\n", 235 | "86 nya -- name: Nyanja (Nyanja)\n", 236 | "87 or -- name: Odia (ଓଡ଼ିଆ)\n", 237 | "88 otq -- name: Querétaro Otomi (Hñähñu)\n", 238 | "89 pa -- name: Punjabi (ਪੰਜਾਬੀ)\n", 239 | "90 pl -- name: Polish (Polski)\n", 240 | "91 prs -- name: Dari (دری)\n", 241 | "92 ps -- name: Pashto (پښتو)\n", 242 | "93 pt -- name: Portuguese (Brazil) (Português (Brasil))\n", 243 | "94 pt-PT -- name: Portuguese (Portugal) (Português (Portugal))\n", 244 | "95 ro -- name: Romanian (Română)\n", 245 | "96 ru -- name: Russian (Русский)\n", 246 | "97 run -- name: Rundi (Rundi)\n", 247 | "98 rw -- name: Kinyarwanda (Kinyarwanda)\n", 248 | "99 sd -- name: Sindhi (سنڌي)\n", 249 | "100 si -- name: Sinhala (සිංහල)\n", 250 | "101 sk -- name: Slovak (Slovenčina)\n", 251 | "102 sl -- name: Slovenian (Slovenščina)\n", 252 | "103 sm -- name: Samoan (Gagana Sāmoa)\n", 253 | "104 sn -- name: Shona (chiShona)\n", 254 | "105 so -- name: Somali (Soomaali)\n", 255 | "106 sq -- name: Albanian (Shqip)\n", 256 | "107 sr-Cyrl -- name: Serbian (Cyrillic) (Српски (ћирилица))\n", 257 | "108 sr-Latn -- name: Serbian (Latin) (Srpski (latinica))\n", 258 | "109 st -- name: Sesotho (Sesotho)\n", 259 | "110 sv -- name: Swedish (Svenska)\n", 260 | "111 sw -- name: Swahili (Kiswahili)\n", 261 | "112 ta -- name: Tamil (தமிழ்)\n", 262 | "113 te -- name: Telugu (తెలుగు)\n", 263 | "114 th -- name: Thai (ไทย)\n", 264 | "115 ti -- name: Tigrinya (ትግር)\n", 265 | "116 tk -- name: Turkmen (Türkmen Dili)\n", 266 | "117 tlh-Latn -- name: Klingon (Latin) (Klingon (Latin))\n", 267 | "118 tlh-Piqd -- name: Klingon (pIqaD) (Klingon (pIqaD))\n", 268 | "119 tn -- name: Setswana (Setswana)\n", 269 | "120 to -- name: Tongan (Lea Fakatonga)\n", 270 | "121 tr -- name: Turkish (Türkçe)\n", 271 | "122 tt -- name: Tatar (Татар)\n", 272 | "123 ty -- name: Tahitian (Reo Tahiti)\n", 273 | "124 ug -- name: Uyghur (ئۇيغۇرچە)\n", 274 | "125 uk -- name: Ukrainian (Українська)\n", 275 | "126 ur -- name: Urdu (اردو)\n", 276 | "127 uz -- name: Uzbek (Latin) (O‘Zbek)\n", 277 | "128 vi -- name: Vietnamese (Tiếng Việt)\n", 278 | "129 xh -- name: Xhosa (isiXhosa)\n", 279 | "130 yo -- name: Yoruba (Èdè Yorùbá)\n", 280 | "131 yua -- name: Yucatec Maya (Yucatec Maya)\n", 281 | "132 yue -- name: Cantonese (Traditional) (粵語 (繁體))\n", 282 | "133 zh-Hans -- name: Chinese Simplified (中文 (简体))\n", 283 | "134 zh-Hant -- name: Chinese Traditional (繁體中文 (繁體))\n", 284 | "135 zu -- name: Zulu (Isi-Zulu)\n", 285 | "\n", 286 | "Transliteration Languages:\n", 287 | "1 ar -- name: Arabic, supported script count: 2\n", 288 | "2 as -- name: Assamese, supported script count: 2\n", 289 | "3 be -- name: Belarusian, supported script count: 2\n", 290 | "4 bg -- name: Bulgarian, supported script count: 2\n", 291 | "5 bn -- name: Bangla, supported script count: 2\n", 292 | "6 brx -- name: Bodo, supported script count: 2\n", 293 | "7 el -- name: Greek, supported script count: 2\n", 294 | "8 fa -- name: Persian, supported script count: 2\n", 295 | "9 gom -- name: Konkani, supported script count: 2\n", 296 | "10 gu -- name: Gujarati, supported script count: 2\n", 297 | "11 he -- name: Hebrew, supported script count: 2\n", 298 | "12 hi -- name: Hindi, supported script count: 2\n", 299 | "13 ja -- name: Japanese, supported script count: 2\n", 300 | "14 kk -- name: Kazakh, supported script count: 2\n", 301 | "15 kn -- name: Kannada, supported script count: 2\n", 302 | "16 ko -- name: Korean, supported script count: 2\n", 303 | "17 ks -- name: Kashmiri, supported script count: 2\n", 304 | "18 ky -- name: Kyrgyz, supported script count: 2\n", 305 | "19 mai -- name: Maithili, supported script count: 2\n", 306 | "20 mk -- name: Macedonian, supported script count: 2\n", 307 | "21 ml -- name: Malayalam, supported script count: 2\n", 308 | "22 mn-Cyrl -- name: Mongolian (Cyrillic), supported script count: 1\n", 309 | "23 mni -- name: Manipuri, supported script count: 2\n", 310 | "24 mr -- name: Marathi, supported script count: 2\n", 311 | "25 ne -- name: Nepali, supported script count: 2\n", 312 | "26 or -- name: Odia, supported script count: 2\n", 313 | "27 pa -- name: Punjabi, supported script count: 2\n", 314 | "28 ru -- name: Russian, supported script count: 2\n", 315 | "29 sa -- name: Sanskrit, supported script count: 2\n", 316 | "30 sd -- name: Sindhi, supported script count: 2\n", 317 | "31 si -- name: Sinhala, supported script count: 2\n", 318 | "32 sr-Cyrl -- name: Serbian (Cyrillic), supported script count: 1\n", 319 | "33 sr-Latn -- name: Serbian (Latin), supported script count: 1\n", 320 | "34 ta -- name: Tamil, supported script count: 2\n", 321 | "35 te -- name: Telugu, supported script count: 2\n", 322 | "36 tg -- name: Tajik, supported script count: 2\n", 323 | "37 th -- name: Thai, supported script count: 1\n", 324 | "38 tt -- name: Tatar, supported script count: 2\n", 325 | "39 uk -- name: Ukrainian, supported script count: 2\n", 326 | "40 ur -- name: Urdu, supported script count: 2\n", 327 | "41 zh-Hans -- name: Chinese Simplified, supported script count: 2\n", 328 | "42 zh-Hant -- name: Chinese Traditional, supported script count: 2\n", 329 | "\n", 330 | "Dictionary Languages:\n", 331 | "1 af -- name: Afrikaans, supported target languages count: 1\n", 332 | "2 ar -- name: Arabic, supported target languages count: 1\n", 333 | "3 bg -- name: Bulgarian, supported target languages count: 1\n", 334 | "4 bn -- name: Bangla, supported target languages count: 1\n", 335 | "5 bs -- name: Bosnian, supported target languages count: 1\n", 336 | "6 ca -- name: Catalan, supported target languages count: 1\n", 337 | "7 cs -- name: Czech, supported target languages count: 1\n", 338 | "8 cy -- name: Welsh, supported target languages count: 1\n", 339 | "9 da -- name: Danish, supported target languages count: 1\n", 340 | "10 de -- name: German, supported target languages count: 1\n", 341 | "11 el -- name: Greek, supported target languages count: 1\n", 342 | "12 en -- name: English, supported target languages count: 49\n", 343 | "13 es -- name: Spanish, supported target languages count: 1\n", 344 | "14 et -- name: Estonian, supported target languages count: 1\n", 345 | "15 fa -- name: Persian, supported target languages count: 1\n", 346 | "16 fi -- name: Finnish, supported target languages count: 1\n", 347 | "17 fr -- name: French, supported target languages count: 1\n", 348 | "18 he -- name: Hebrew, supported target languages count: 1\n", 349 | "19 hi -- name: Hindi, supported target languages count: 1\n", 350 | "20 hr -- name: Croatian, supported target languages count: 1\n", 351 | "21 hu -- name: Hungarian, supported target languages count: 1\n", 352 | "22 id -- name: Indonesian, supported target languages count: 1\n", 353 | "23 is -- name: Icelandic, supported target languages count: 1\n", 354 | "24 it -- name: Italian, supported target languages count: 1\n", 355 | "25 ja -- name: Japanese, supported target languages count: 1\n", 356 | "26 ko -- name: Korean, supported target languages count: 1\n", 357 | "27 lt -- name: Lithuanian, supported target languages count: 1\n", 358 | "28 lv -- name: Latvian, supported target languages count: 1\n", 359 | "29 ms -- name: Malay, supported target languages count: 1\n", 360 | "30 mt -- name: Maltese, supported target languages count: 1\n", 361 | "31 mww -- name: Hmong Daw, supported target languages count: 1\n", 362 | "32 nb -- name: Norwegian, supported target languages count: 1\n", 363 | "33 nl -- name: Dutch, supported target languages count: 1\n", 364 | "34 pl -- name: Polish, supported target languages count: 1\n", 365 | "35 pt -- name: Portuguese (Brazil), supported target languages count: 1\n", 366 | "36 ro -- name: Romanian, supported target languages count: 1\n", 367 | "37 ru -- name: Russian, supported target languages count: 1\n", 368 | "38 sk -- name: Slovak, supported target languages count: 1\n", 369 | "39 sl -- name: Slovenian, supported target languages count: 1\n", 370 | "40 sr-Latn -- name: Serbian (Latin), supported target languages count: 1\n", 371 | "41 sv -- name: Swedish, supported target languages count: 1\n", 372 | "42 sw -- name: Swahili, supported target languages count: 1\n", 373 | "43 ta -- name: Tamil, supported target languages count: 1\n", 374 | "44 th -- name: Thai, supported target languages count: 1\n", 375 | "45 tlh-Latn -- name: Klingon (Latin), supported target languages count: 1\n", 376 | "46 tr -- name: Turkish, supported target languages count: 1\n", 377 | "47 uk -- name: Ukrainian, supported target languages count: 1\n", 378 | "48 ur -- name: Urdu, supported target languages count: 1\n", 379 | "49 vi -- name: Vietnamese, supported target languages count: 1\n", 380 | "50 zh-Hans -- name: Chinese Simplified, supported target languages count: 1\n" 381 | ] 382 | } 383 | ], 384 | "source": [ 385 | "try:\n", 386 | " response = text_translator.get_supported_languages()\n", 387 | "\n", 388 | " print(\n", 389 | " f\"Number of supported languages for translate operation: {len(response.translation) if response.translation is not None else 0}\"\n", 390 | " )\n", 391 | " print(\n", 392 | " f\"Number of supported languages for transliterate operation: {len(response.transliteration) if response.transliteration is not None else 0}\"\n", 393 | " )\n", 394 | " print(\n", 395 | " f\"Number of supported languages for dictionary operations: {len(response.dictionary) if response.dictionary is not None else 0}\"\n", 396 | " )\n", 397 | "\n", 398 | " print()\n", 399 | "\n", 400 | " if response.translation is not None:\n", 401 | " print(\"Translation Languages:\")\n", 402 | " i = 1\n", 403 | " for key, value in response.translation.items():\n", 404 | " print(f\"{i} {key} -- name: {value.name} ({value.native_name})\")\n", 405 | " i += 1\n", 406 | " print()\n", 407 | "\n", 408 | " if response.transliteration is not None:\n", 409 | " print(\"Transliteration Languages:\")\n", 410 | " i = 1\n", 411 | " for key, value in response.transliteration.items():\n", 412 | " print(\n", 413 | " f\"{i} {key} -- name: {value.name}, supported script count: {len(value.scripts)}\")\n", 414 | " i += 1\n", 415 | " print()\n", 416 | "\n", 417 | " if response.dictionary is not None:\n", 418 | " print(\"Dictionary Languages:\")\n", 419 | " i = 1\n", 420 | " for key, value in response.dictionary.items():\n", 421 | " print(\n", 422 | " f\"{i} {key} -- name: {value.name}, supported target languages count: {len(value.translations)}\")\n", 423 | " i += 1\n", 424 | "\n", 425 | "except HttpResponseError as exception:\n", 426 | " if exception.error is not None:\n", 427 | " print(f\"Error Code: {exception.error.code}\")\n", 428 | " print(f\"Message: {exception.error.message}\")\n", 429 | " raise" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 9, 435 | "id": "d996b8f4", 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "name": "stdout", 440 | "output_type": "stream", 441 | "text": [ 442 | "Number of supported languages for translate operation: 135\n", 443 | "Number of supported languages for transliterate operation: 42\n", 444 | "Number of supported languages for dictionary operations: 50\n", 445 | "\n", 446 | "Translation Languages:\n", 447 | "1 af -- name: Afrikaans (Afrikaans)\n", 448 | "2 am -- name: Amharique (አማርኛ)\n", 449 | "3 ar -- name: Arabe (العربية)\n", 450 | "4 as -- name: Assamais (অসমীয়া)\n", 451 | "5 az -- name: Azerbaïdjanais (Azərbaycan)\n", 452 | "6 ba -- name: Bachkir (Bashkir)\n", 453 | "7 bg -- name: Bulgare (Български)\n", 454 | "8 bho -- name: Bhojpuri (भोजपुरी)\n", 455 | "9 bn -- name: Bengali (বাংলা)\n", 456 | "10 bo -- name: Tibétain (བོད་སྐད་)\n", 457 | "11 brx -- name: Bodo (बड़ो)\n", 458 | "12 bs -- name: Bosniaque (Bosanski)\n", 459 | "13 ca -- name: Catalan (Català)\n", 460 | "14 cs -- name: Tchèque (Čeština)\n", 461 | "15 cy -- name: Gallois (Cymraeg)\n", 462 | "16 da -- name: Danois (Dansk)\n", 463 | "17 de -- name: Allemand (Deutsch)\n", 464 | "18 doi -- name: Dogri (डोगरी)\n", 465 | "19 dsb -- name: Bas-Sorabe (Dolnoserbšćina)\n", 466 | "20 dv -- name: Maldivien (ދިވެހިބަސް)\n", 467 | "21 el -- name: Grec (Ελληνικά)\n", 468 | "22 en -- name: Anglais (English)\n", 469 | "23 es -- name: Espagnol (Español)\n", 470 | "24 et -- name: Estonien (Eesti)\n", 471 | "25 eu -- name: Basque (Euskara)\n", 472 | "26 fa -- name: Persan (فارسی)\n", 473 | "27 fi -- name: Finnois (Suomi)\n", 474 | "28 fil -- name: Filipino (Filipino)\n", 475 | "29 fj -- name: Fidjien (Na Vosa Vakaviti)\n", 476 | "30 fo -- name: Féroïen (Føroyskt)\n", 477 | "31 fr -- name: Français (Français)\n", 478 | "32 fr-CA -- name: Français (Canada) (Français (Canada))\n", 479 | "33 ga -- name: Irlandais (Gaeilge)\n", 480 | "34 gl -- name: Galicien (Galego)\n", 481 | "35 gom -- name: Konkani (कोंकणी)\n", 482 | "36 gu -- name: Goudjarati (ગુજરાતી)\n", 483 | "37 ha -- name: Haoussa (Hausa)\n", 484 | "38 he -- name: Hébreu (עברית)\n", 485 | "39 hi -- name: Hindi (हिन्दी)\n", 486 | "40 hne -- name: Chhattisgarhi (छत्तीसगढ़ी)\n", 487 | "41 hr -- name: Croate (Hrvatski)\n", 488 | "42 hsb -- name: Haut-Sorabe (Hornjoserbšćina)\n", 489 | "43 ht -- name: Créole Haïtien (Haitian Creole)\n", 490 | "44 hu -- name: Hongrois (Magyar)\n", 491 | "45 hy -- name: Arménien (Հայերեն)\n", 492 | "46 id -- name: Indonésien (Indonesia)\n", 493 | "47 ig -- name: Igbo (Ásụ̀sụ́ Ìgbò)\n", 494 | "48 ikt -- name: Inuinnaqtun (Inuinnaqtun)\n", 495 | "49 is -- name: Islandais (Íslenska)\n", 496 | "50 it -- name: Italien (Italiano)\n", 497 | "51 iu -- name: Inuktitut (ᐃᓄᒃᑎᑐᑦ)\n", 498 | "52 iu-Latn -- name: Inuktitut (Latin) (Inuktitut (Latin))\n", 499 | "53 ja -- name: Japonais (日本語)\n", 500 | "54 ka -- name: Géorgien (ქართული)\n", 501 | "55 kk -- name: Kazakh (Қазақ Тілі)\n", 502 | "56 km -- name: Khmer (ខ្មែរ)\n", 503 | "57 kmr -- name: Kurde (septentrional) (Kurdî (Bakur))\n", 504 | "58 kn -- name: Kannada (ಕನ್ನಡ)\n", 505 | "59 ko -- name: Coréen (한국어)\n", 506 | "60 ks -- name: Kashmiri (کٲشُر)\n", 507 | "61 ku -- name: Kurde (central) (Kurdî (Navîn))\n", 508 | "62 ky -- name: Kirghize (Кыргызча)\n", 509 | "63 ln -- name: Lingala (Lingála)\n", 510 | "64 lo -- name: Lao (ລາວ)\n", 511 | "65 lt -- name: Lituanien (Lietuvių)\n", 512 | "66 lug -- name: Ganda (Ganda)\n", 513 | "67 lv -- name: Letton (Latviešu)\n", 514 | "68 lzh -- name: Chinese (Literary) (中文 (文言文))\n", 515 | "69 mai -- name: Maïthili (मैथिली)\n", 516 | "70 mg -- name: Malgache (Malagasy)\n", 517 | "71 mi -- name: Maori (Te Reo Māori)\n", 518 | "72 mk -- name: Macédonien (Македонски)\n", 519 | "73 ml -- name: Malayalam (മലയാളം)\n", 520 | "74 mn-Cyrl -- name: Mongolian (Cyrillic) (Монгол)\n", 521 | "75 mn-Mong -- name: Mongolian (Traditional) (ᠮᠣᠩᠭᠣᠯ ᠬᠡᠯᠡ)\n", 522 | "76 mni -- name: Manipuri (ꯃꯩꯇꯩꯂꯣꯟ)\n", 523 | "77 mr -- name: Marathi (मराठी)\n", 524 | "78 ms -- name: Malais (Melayu)\n", 525 | "79 mt -- name: Maltais (Malti)\n", 526 | "80 mww -- name: Hmong (Hmong Daw)\n", 527 | "81 my -- name: Birman (မြန်မာ)\n", 528 | "82 nb -- name: Norvégien Bokmål (Norsk Bokmål)\n", 529 | "83 ne -- name: Népalais (नेपाली)\n", 530 | "84 nl -- name: Néerlandais (Nederlands)\n", 531 | "85 nso -- name: Sesotho sa Leboa (Sesotho sa Leboa)\n", 532 | "86 nya -- name: Nyanja (Nyanja)\n", 533 | "87 or -- name: Odia (ଓଡ଼ିଆ)\n", 534 | "88 otq -- name: Otomi De Querétaro (Hñähñu)\n", 535 | "89 pa -- name: Pendjabi (ਪੰਜਾਬੀ)\n", 536 | "90 pl -- name: Polonais (Polski)\n", 537 | "91 prs -- name: Dari (دری)\n", 538 | "92 ps -- name: Pachto (پښتو)\n", 539 | "93 pt -- name: Portugais (Brésil) (Português (Brasil))\n", 540 | "94 pt-PT -- name: Portugais (Portugal) (Português (Portugal))\n", 541 | "95 ro -- name: Roumain (Română)\n", 542 | "96 ru -- name: Russe (Русский)\n", 543 | "97 run -- name: Rundi (Rundi)\n", 544 | "98 rw -- name: Kinyarwanda (Kinyarwanda)\n", 545 | "99 sd -- name: Sindhi (سنڌي)\n", 546 | "100 si -- name: Cingalais (සිංහල)\n", 547 | "101 sk -- name: Slovaque (Slovenčina)\n", 548 | "102 sl -- name: Slovène (Slovenščina)\n", 549 | "103 sm -- name: Samoan (Gagana Sāmoa)\n", 550 | "104 sn -- name: Shona (chiShona)\n", 551 | "105 so -- name: Somali (Soomaali)\n", 552 | "106 sq -- name: Albanais (Shqip)\n", 553 | "107 sr-Cyrl -- name: Serbe (Cyrillique) (Српски (ћирилица))\n", 554 | "108 sr-Latn -- name: Serbe (Latin) (Srpski (latinica))\n", 555 | "109 st -- name: Sesotho (Sesotho)\n", 556 | "110 sv -- name: Suédois (Svenska)\n", 557 | "111 sw -- name: Swahili (Kiswahili)\n", 558 | "112 ta -- name: Tamoul (தமிழ்)\n", 559 | "113 te -- name: Télougou (తెలుగు)\n", 560 | "114 th -- name: Thaï (ไทย)\n", 561 | "115 ti -- name: Tigrigna (ትግር)\n", 562 | "116 tk -- name: Turkmène (Türkmen Dili)\n", 563 | "117 tlh-Latn -- name: Klingon (Latin) (Klingon (Latin))\n", 564 | "118 tlh-Piqd -- name: Klingon (pIqaD) (Klingon (pIqaD))\n", 565 | "119 tn -- name: Setswana (Setswana)\n", 566 | "120 to -- name: Tongien (Lea Fakatonga)\n", 567 | "121 tr -- name: Turc (Türkçe)\n", 568 | "122 tt -- name: Tatar (Татар)\n", 569 | "123 ty -- name: Tahitien (Reo Tahiti)\n", 570 | "124 ug -- name: Ouïghour (ئۇيغۇرچە)\n", 571 | "125 uk -- name: Ukrainien (Українська)\n", 572 | "126 ur -- name: Ourdou (اردو)\n", 573 | "127 uz -- name: Ouzbek (O‘Zbek)\n", 574 | "128 vi -- name: Vietnamien (Tiếng Việt)\n", 575 | "129 xh -- name: Xhosa (isiXhosa)\n", 576 | "130 yo -- name: Yoruba (Èdè Yorùbá)\n", 577 | "131 yua -- name: Maya Yucatèque (Yucatec Maya)\n", 578 | "132 yue -- name: Cantonais (Traditionnel) (粵語 (繁體))\n", 579 | "133 zh-Hans -- name: Chinois (Simplifié) (中文 (简体))\n", 580 | "134 zh-Hant -- name: Chinois (Traditionnel) (繁體中文 (繁體))\n", 581 | "135 zu -- name: Zoulou (Isi-Zulu)\n", 582 | "Transliteration Languages:\n", 583 | "0 ar -- name: Arabe, supported script count: 2\n", 584 | "1 as -- name: Assamais, supported script count: 2\n", 585 | "2 be -- name: Biélorusse, supported script count: 2\n", 586 | "3 bg -- name: Bulgare, supported script count: 2\n", 587 | "4 bn -- name: Bengali, supported script count: 2\n", 588 | "5 brx -- name: Bodo, supported script count: 2\n", 589 | "6 el -- name: Grec, supported script count: 2\n", 590 | "7 fa -- name: Persan, supported script count: 2\n", 591 | "8 gom -- name: Konkani, supported script count: 2\n", 592 | "9 gu -- name: Goudjarati, supported script count: 2\n", 593 | "10 he -- name: Hébreu, supported script count: 2\n", 594 | "11 hi -- name: Hindi, supported script count: 2\n", 595 | "12 ja -- name: Japonais, supported script count: 2\n", 596 | "13 kk -- name: Kazakh, supported script count: 2\n", 597 | "14 kn -- name: Kannada, supported script count: 2\n", 598 | "15 ko -- name: Coréen, supported script count: 2\n", 599 | "16 ks -- name: Kashmiri, supported script count: 2\n", 600 | "17 ky -- name: Kirghize, supported script count: 2\n", 601 | "18 mai -- name: Maïthili, supported script count: 2\n", 602 | "19 mk -- name: Macédonien, supported script count: 2\n", 603 | "20 ml -- name: Malayalam, supported script count: 2\n", 604 | "21 mn-Cyrl -- name: Mongolian (Cyrillic), supported script count: 1\n", 605 | "22 mni -- name: Manipuri, supported script count: 2\n", 606 | "23 mr -- name: Marathi, supported script count: 2\n", 607 | "24 ne -- name: Népalais, supported script count: 2\n", 608 | "25 or -- name: Odia, supported script count: 2\n", 609 | "26 pa -- name: Pendjabi, supported script count: 2\n", 610 | "27 ru -- name: Russe, supported script count: 2\n", 611 | "28 sa -- name: Sanskrit, supported script count: 2\n", 612 | "29 sd -- name: Sindhi, supported script count: 2\n", 613 | "30 si -- name: Cingalais, supported script count: 2\n", 614 | "31 sr-Cyrl -- name: Serbe (Cyrillique), supported script count: 1\n", 615 | "32 sr-Latn -- name: Serbe (Latin), supported script count: 1\n", 616 | "33 ta -- name: Tamoul, supported script count: 2\n", 617 | "34 te -- name: Télougou, supported script count: 2\n", 618 | "35 tg -- name: Tadjik, supported script count: 2\n", 619 | "36 th -- name: Thaï, supported script count: 1\n", 620 | "37 tt -- name: Tatar, supported script count: 2\n", 621 | "38 uk -- name: Ukrainien, supported script count: 2\n", 622 | "39 ur -- name: Ourdou, supported script count: 2\n", 623 | "40 zh-Hans -- name: Chinois (Simplifié), supported script count: 2\n", 624 | "41 zh-Hant -- name: Chinois (Traditionnel), supported script count: 2\n", 625 | "Dictionary Languages:\n", 626 | "0 af -- name: Afrikaans, supported target languages count: 1\n", 627 | "1 ar -- name: Arabe, supported target languages count: 1\n", 628 | "2 bg -- name: Bulgare, supported target languages count: 1\n", 629 | "3 bn -- name: Bengali, supported target languages count: 1\n", 630 | "4 bs -- name: Bosniaque, supported target languages count: 1\n", 631 | "5 ca -- name: Catalan, supported target languages count: 1\n", 632 | "6 cs -- name: Tchèque, supported target languages count: 1\n", 633 | "7 cy -- name: Gallois, supported target languages count: 1\n", 634 | "8 da -- name: Danois, supported target languages count: 1\n", 635 | "9 de -- name: Allemand, supported target languages count: 1\n", 636 | "10 el -- name: Grec, supported target languages count: 1\n", 637 | "11 en -- name: Anglais, supported target languages count: 49\n", 638 | "12 es -- name: Espagnol, supported target languages count: 1\n", 639 | "13 et -- name: Estonien, supported target languages count: 1\n", 640 | "14 fa -- name: Persan, supported target languages count: 1\n", 641 | "15 fi -- name: Finnois, supported target languages count: 1\n", 642 | "16 fr -- name: Français, supported target languages count: 1\n", 643 | "17 he -- name: Hébreu, supported target languages count: 1\n", 644 | "18 hi -- name: Hindi, supported target languages count: 1\n", 645 | "19 hr -- name: Croate, supported target languages count: 1\n", 646 | "20 hu -- name: Hongrois, supported target languages count: 1\n", 647 | "21 id -- name: Indonésien, supported target languages count: 1\n", 648 | "22 is -- name: Islandais, supported target languages count: 1\n", 649 | "23 it -- name: Italien, supported target languages count: 1\n", 650 | "24 ja -- name: Japonais, supported target languages count: 1\n", 651 | "25 ko -- name: Coréen, supported target languages count: 1\n", 652 | "26 lt -- name: Lituanien, supported target languages count: 1\n", 653 | "27 lv -- name: Letton, supported target languages count: 1\n", 654 | "28 ms -- name: Malais, supported target languages count: 1\n", 655 | "29 mt -- name: Maltais, supported target languages count: 1\n", 656 | "30 mww -- name: Hmong, supported target languages count: 1\n", 657 | "31 nb -- name: Norvégien Bokmål, supported target languages count: 1\n", 658 | "32 nl -- name: Néerlandais, supported target languages count: 1\n", 659 | "33 pl -- name: Polonais, supported target languages count: 1\n", 660 | "34 pt -- name: Portugais (Brésil), supported target languages count: 1\n", 661 | "35 ro -- name: Roumain, supported target languages count: 1\n", 662 | "36 ru -- name: Russe, supported target languages count: 1\n", 663 | "37 sk -- name: Slovaque, supported target languages count: 1\n", 664 | "38 sl -- name: Slovène, supported target languages count: 1\n", 665 | "39 sr-Latn -- name: Serbe (Latin), supported target languages count: 1\n", 666 | "40 sv -- name: Suédois, supported target languages count: 1\n", 667 | "41 sw -- name: Swahili, supported target languages count: 1\n", 668 | "42 ta -- name: Tamoul, supported target languages count: 1\n", 669 | "43 th -- name: Thaï, supported target languages count: 1\n", 670 | "44 tlh-Latn -- name: Klingon (Latin), supported target languages count: 1\n", 671 | "45 tr -- name: Turc, supported target languages count: 1\n", 672 | "46 uk -- name: Ukrainien, supported target languages count: 1\n", 673 | "47 ur -- name: Ourdou, supported target languages count: 1\n", 674 | "48 vi -- name: Vietnamien, supported target languages count: 1\n", 675 | "49 zh-Hans -- name: Chinois (Simplifié), supported target languages count: 1\n" 676 | ] 677 | } 678 | ], 679 | "source": [ 680 | "try:\n", 681 | " accept_language = \"fr\"\n", 682 | " response = text_translator.get_supported_languages(accept_language=accept_language)\n", 683 | "\n", 684 | " print(\n", 685 | " f\"Number of supported languages for translate operation: {len(response.translation) if response.translation is not None else 0}\"\n", 686 | " )\n", 687 | " print(\n", 688 | " f\"Number of supported languages for transliterate operation: {len(response.transliteration) if response.transliteration is not None else 0}\"\n", 689 | " )\n", 690 | " print(\n", 691 | " f\"Number of supported languages for dictionary operations: {len(response.dictionary) if response.dictionary is not None else 0}\"\n", 692 | " )\n", 693 | "\n", 694 | " print()\n", 695 | " if response.translation is not None:\n", 696 | " print(\"Translation Languages:\")\n", 697 | " i = 1\n", 698 | " for key, value in response.translation.items():\n", 699 | " print(f\"{i} {key} -- name: {value.name} ({value.native_name})\")\n", 700 | " i += 1\n", 701 | "\n", 702 | " if response.transliteration is not None:\n", 703 | " print(\"Transliteration Languages:\")\n", 704 | " i = 0\n", 705 | " for key, value in response.transliteration.items():\n", 706 | " print(\n", 707 | " f\"{i} {key} -- name: {value.name}, supported script count: {len(value.scripts)}\")\n", 708 | " i += 1\n", 709 | "\n", 710 | " if response.dictionary is not None:\n", 711 | " i = 0\n", 712 | " print(\"Dictionary Languages:\")\n", 713 | " for key, value in response.dictionary.items():\n", 714 | " print(\n", 715 | " f\"{i} {key} -- name: {value.name}, supported target languages count: {len(value.translations)}\")\n", 716 | " i += 1\n", 717 | "\n", 718 | "except HttpResponseError as exception:\n", 719 | " if exception.error is not None:\n", 720 | " print(f\"Error Code: {exception.error.code}\")\n", 721 | " print(f\"Message: {exception.error.message}\")\n", 722 | " raise" 723 | ] 724 | }, 725 | { 726 | "cell_type": "markdown", 727 | "id": "b0cb26f5", 728 | "metadata": {}, 729 | "source": [ 730 | "## df" 731 | ] 732 | }, 733 | { 734 | "cell_type": "code", 735 | "execution_count": 10, 736 | "id": "7fe70ae9", 737 | "metadata": {}, 738 | "outputs": [], 739 | "source": [ 740 | "data = []\n", 741 | "\n", 742 | "if response.translation is not None:\n", 743 | " for key, value in response.translation.items():\n", 744 | " data.append({'Language_Code': key,\n", 745 | " 'Language_Name': value.name,\n", 746 | " 'Native_Name': value.native_name})\n", 747 | "\n", 748 | "df = pd.DataFrame(data)" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": 11, 754 | "id": "4b6a1a3a", 755 | "metadata": {}, 756 | "outputs": [ 757 | { 758 | "data": { 759 | "text/html": [ 760 | "
\n", 761 | "\n", 774 | "\n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | "
Language_CodeLanguage_NameNative_Name
0afAfrikaansAfrikaans
1amAmhariqueአማርኛ
2arArabeالعربية
3asAssamaisঅসমীয়া
4azAzerbaïdjanaisAzərbaycan
............
130yuaMaya YucatèqueYucatec Maya
131yueCantonais (Traditionnel)粵語 (繁體)
132zh-HansChinois (Simplifié)中文 (简体)
133zh-HantChinois (Traditionnel)繁體中文 (繁體)
134zuZoulouIsi-Zulu
\n", 852 | "

135 rows × 3 columns

\n", 853 | "
" 854 | ], 855 | "text/plain": [ 856 | " Language_Code Language_Name Native_Name\n", 857 | "0 af Afrikaans Afrikaans\n", 858 | "1 am Amharique አማርኛ\n", 859 | "2 ar Arabe العربية\n", 860 | "3 as Assamais অসমীয়া\n", 861 | "4 az Azerbaïdjanais Azərbaycan\n", 862 | ".. ... ... ...\n", 863 | "130 yua Maya Yucatèque Yucatec Maya\n", 864 | "131 yue Cantonais (Traditionnel) 粵語 (繁體)\n", 865 | "132 zh-Hans Chinois (Simplifié) 中文 (简体)\n", 866 | "133 zh-Hant Chinois (Traditionnel) 繁體中文 (繁體)\n", 867 | "134 zu Zoulou Isi-Zulu\n", 868 | "\n", 869 | "[135 rows x 3 columns]" 870 | ] 871 | }, 872 | "execution_count": 11, 873 | "metadata": {}, 874 | "output_type": "execute_result" 875 | } 876 | ], 877 | "source": [ 878 | "df" 879 | ] 880 | }, 881 | { 882 | "cell_type": "code", 883 | "execution_count": 12, 884 | "id": "fdba71ae", 885 | "metadata": {}, 886 | "outputs": [ 887 | { 888 | "data": { 889 | "text/plain": [ 890 | "(135, 3)" 891 | ] 892 | }, 893 | "execution_count": 12, 894 | "metadata": {}, 895 | "output_type": "execute_result" 896 | } 897 | ], 898 | "source": [ 899 | "df.shape" 900 | ] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "execution_count": 13, 905 | "id": "079ba312", 906 | "metadata": {}, 907 | "outputs": [], 908 | "source": [ 909 | "df.to_excel(\"languages.xlsx\")" 910 | ] 911 | }, 912 | { 913 | "cell_type": "code", 914 | "execution_count": 14, 915 | "id": "dce3f151", 916 | "metadata": {}, 917 | "outputs": [ 918 | { 919 | "name": "stdout", 920 | "output_type": "stream", 921 | "text": [ 922 | "-rwxrwxrwx 1 root root 11K Mar 5 11:11 languages.xlsx\n" 923 | ] 924 | } 925 | ], 926 | "source": [ 927 | "!ls languages.xlsx -lh" 928 | ] 929 | }, 930 | { 931 | "cell_type": "code", 932 | "execution_count": 15, 933 | "id": "d85b4dfa", 934 | "metadata": {}, 935 | "outputs": [], 936 | "source": [ 937 | "def get_language_name(langcode):\n", 938 | " # Get language name from its code\n", 939 | " langname = df.loc[df['Language_Code'] == langcode, 'Language_Name'].values[0]\n", 940 | " print(langcode, \"=>\", langname)\n", 941 | " \n", 942 | " return langname" 943 | ] 944 | }, 945 | { 946 | "cell_type": "code", 947 | "execution_count": 16, 948 | "id": "46aafdd5", 949 | "metadata": {}, 950 | "outputs": [ 951 | { 952 | "name": "stdout", 953 | "output_type": "stream", 954 | "text": [ 955 | "fr => Français\n" 956 | ] 957 | }, 958 | { 959 | "data": { 960 | "text/plain": [ 961 | "'Français'" 962 | ] 963 | }, 964 | "execution_count": 16, 965 | "metadata": {}, 966 | "output_type": "execute_result" 967 | } 968 | ], 969 | "source": [ 970 | "get_language_name(\"fr\")" 971 | ] 972 | }, 973 | { 974 | "cell_type": "code", 975 | "execution_count": 17, 976 | "id": "33486bb3", 977 | "metadata": {}, 978 | "outputs": [], 979 | "source": [ 980 | "def get_language_code(langname):\n", 981 | " # get language code from its name\n", 982 | " langcode = df.loc[df['Language_Name'] == langname, 'Language_Code'].values[0]\n", 983 | " print(langname, \"=>\", langcode)\n", 984 | " \n", 985 | " return langcode" 986 | ] 987 | }, 988 | { 989 | "cell_type": "code", 990 | "execution_count": 18, 991 | "id": "2af2a2ae", 992 | "metadata": {}, 993 | "outputs": [ 994 | { 995 | "name": "stdout", 996 | "output_type": "stream", 997 | "text": [ 998 | "Français => fr\n" 999 | ] 1000 | }, 1001 | { 1002 | "data": { 1003 | "text/plain": [ 1004 | "'fr'" 1005 | ] 1006 | }, 1007 | "execution_count": 18, 1008 | "metadata": {}, 1009 | "output_type": "execute_result" 1010 | } 1011 | ], 1012 | "source": [ 1013 | "get_language_code(\"Français\")" 1014 | ] 1015 | }, 1016 | { 1017 | "cell_type": "code", 1018 | "execution_count": 19, 1019 | "id": "24d24d44", 1020 | "metadata": {}, 1021 | "outputs": [], 1022 | "source": [ 1023 | "df.to_excel(\"output.xlsx\")" 1024 | ] 1025 | }, 1026 | { 1027 | "cell_type": "code", 1028 | "execution_count": 20, 1029 | "id": "427c29b0", 1030 | "metadata": {}, 1031 | "outputs": [ 1032 | { 1033 | "data": { 1034 | "text/html": [ 1035 | "output.xlsx
" 1036 | ], 1037 | "text/plain": [ 1038 | "/mnt/batch/tasks/shared/LS_root/mounts/clusters/seretkow8/code/Users/seretkow/Azure AI Translator/output.xlsx" 1039 | ] 1040 | }, 1041 | "execution_count": 20, 1042 | "metadata": {}, 1043 | "output_type": "execute_result" 1044 | } 1045 | ], 1046 | "source": [ 1047 | "from IPython.display import FileLink\n", 1048 | "\n", 1049 | "LINK = FileLink(path='output.xlsx')\n", 1050 | "LINK" 1051 | ] 1052 | }, 1053 | { 1054 | "cell_type": "code", 1055 | "execution_count": null, 1056 | "id": "7698058d", 1057 | "metadata": {}, 1058 | "outputs": [], 1059 | "source": [] 1060 | } 1061 | ], 1062 | "metadata": { 1063 | "kernelspec": { 1064 | "display_name": "Python 3.10 - SDK v2", 1065 | "language": "python", 1066 | "name": "python310-sdkv2" 1067 | }, 1068 | "language_info": { 1069 | "codemirror_mode": { 1070 | "name": "ipython", 1071 | "version": 3 1072 | }, 1073 | "file_extension": ".py", 1074 | "mimetype": "text/x-python", 1075 | "name": "python", 1076 | "nbconvert_exporter": "python", 1077 | "pygments_lexer": "ipython3", 1078 | "version": "3.10.14" 1079 | } 1080 | }, 1081 | "nbformat": 4, 1082 | "nbformat_minor": 5 1083 | } 1084 | --------------------------------------------------------------------------------