├── .gitignore
├── other_versions
├── FixCommonArabicMistakes-Basic.b
└── FixCommonArabicMistakes-Standalone.py
├── README.md
└── FixCommonArabicMistakes-Python.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.swp
3 |
--------------------------------------------------------------------------------
/other_versions/FixCommonArabicMistakes-Basic.b:
--------------------------------------------------------------------------------
1 | REM ***** BASIC *****
2 |
3 | Sub FixCommonArabicMistakes
4 | oDoc = thisComponent
5 | aFind = Array("(\p{script=arabic}\W?)([ ]?;)", "(\p{script=arabic}\W?)([ ]?,)", "\([ ]+", "[ ]+\)", "^[\ ]*$", "^[\ ]*", "[\ ]*$", "[ ]+", " :", " ؛", " ،", " \.", " !", " ؟", " و ", "^و ", "ـ")
6 | aReplace = Array("$1؛", "$1،", "(", ")", "", "", "", " ", ":", "؛", "،", ".", "!", "؟", " و", "و", "")
7 | aRayCount = 0
8 | FandR = oDoc.createReplaceDescriptor
9 | FandR.SearchCaseSensitive = true
10 | FandR.SearchRegularExpression = true
11 | While aRayCount <= uBound(aFind)
12 | FandR.setSearchString(aFind(aRayCount))
13 | FandR.setReplaceString(aReplace(aRayCount))
14 | aRayCount = aRayCount + 1
15 | oDoc.ReplaceAll(FandR)
16 | Wend
17 | End Sub
18 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | FCAM script (Fix Common Arabic Mistakes).
2 | =========================================
3 |
4 | Open/LibreOffice macros (Basic and Python) use Regex to fix common Arabic mistakes. For now both of Basic and Python versions do the same thing, but I will go with Python from now on.
5 |
6 | What's it doing?
7 | -------------------
8 | - Removes any empty lines.
9 | - Removes Arabic Kashida (Tatweel "ـ").
10 | - Removes whitespaces after single WAW letter (و).
11 | - Removes whitespaces before Arabic punctuation (؛،.؟:!).
12 | - Removes any sequence of question and exclamation marks and replace them with one only (e.g. "!!!" and "??").
13 | - Removes whitespaces in inside brackets "()" and "[]".
14 | - Removes any sequence of whitespaces and whitespaces at end of lines.
15 | - Replaces Latin comma (decimal separator) "," after Arabic words to Arabic comma "،".
16 | - Replaces Latin semicolon ";" after Arabic words to Arabic semicolon "؛".
17 |
18 | Basic version.
19 | -------------------
20 | Just copy the code into built-in macros editor.
21 |
22 |
23 | Python version.
24 | -------------------
25 | By default Open/LibreOffice supports Basic natively, and if you want to use python macros, you have to install an extra package.
26 |
27 | For Ubuntu:
28 | ```
29 | sudo apt-get install libreoffice-script-provider-python
30 | ```
31 |
32 | ### Install
33 | Copy the macro to scripts path.
34 |
35 | Global path to make it available for all users:
36 | ```
37 | /usr/lib/libreoffice/share/Scripts/python/
38 | ```
39 |
40 | **OR** under your home:
41 | ```
42 | ~/.config/libreoffice/4/user/Scripts/python/
43 | ```
44 |
45 | ### Run
46 | From the menu:
47 | ```
48 | Tools ▸ Macros ▸ Run Macro ▸ LibreOffice Macros ▸ FixCommonArabicMistakes-Python
49 | ```
50 |
51 |
52 |
53 |
54 |
55 | Standalone version.
56 | -------------------
57 | I made a Standalone version of script too, it useful with bulk files or so, it maybe not that cool, but maybe help someone :-)
58 |
59 | This script not works as a macro but will start Opne/LibreOffice on a specific local port and connect to it, then open the file passed to it, and fix common mistakes. It will not save the file automatically, but it will wait you to do save as a final action.
60 |
61 | ### Run:
62 | ```
63 | ./FixCommonArabicMistakes-Standalone.py file_name.odt
64 | ```
65 |
--------------------------------------------------------------------------------
/other_versions/FixCommonArabicMistakes-Standalone.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Fix Common Arabic Mistakes - Standalone version.
5 | =========================================
6 |
7 | DESCRIPTION:
8 | Standalone version of Python Open/LibreOffice macro use Regex to fix common Arabic mistakes.
9 | This version useful with bulk files or so, it maybe not that cool, but it maybe help someone :-)
10 |
11 | This script not works as a macro but will connect to Opne/LibreOffice and open file passed to it, and fix common mistakes.
12 | It will not save the file automatically, but it will wait you to do save as a final action.
13 |
14 | USING:
15 | ./FixCommonArabicMistakes-Standalone.py file_name.odt
16 |
17 | MORE:
18 | https://github.com/AAbouZaid/fix-common-arabic-mistakes
19 |
20 | BY:
21 | Ahmed M. AbouZaid (http://tech.aabouzaid.com/) - Under GPL v2.0 or later.
22 | """
23 | import re
24 | import os
25 | import sys
26 | import uno
27 | import time
28 | import unohelper
29 |
30 | if len(sys.argv) == 1:
31 | print("Please write odt file name/path.")
32 | sys.exit(1)
33 |
34 | file_path = sys.argv[1]
35 | os.popen('soffice --accept="socket,host=localhost,port=2015;urp;StarOffice.ServiceManager" --norestore --nologo &')
36 | time.sleep(3)
37 | localcontext = uno.getComponentContext()
38 | resolver = localcontext.getServiceManager().createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localcontext)
39 | context = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext")
40 | desktop = context.getServiceManager().createInstanceWithContext("com.sun.star.frame.Desktop", context)
41 | url = unohelper.systemPathToFileUrl(os.path.abspath(file_path))
42 |
43 | currentDoc = desktop.loadComponentFromURL(url, "_blank", 0, () )
44 | findAndReplace = currentDoc.createReplaceDescriptor()
45 | findAndReplace.SearchCaseSensitive = True
46 | findAndReplace.SearchRegularExpression = True
47 |
48 | replaceList = {
49 | "(\p{script=arabic}\W?)([ ]?;)": "$1؛",
50 | "(\p{script=arabic}\W?)([ ]?,)": "$1،",
51 | "\([ ]+": "(",
52 | "[ ]+\)": ")",
53 | "\[[ ]+": "[",
54 | "[ ]+\]": "]",
55 | "^[ ]+$": "",
56 | "^[ ]+": "",
57 | "[ ]+$": "",
58 | "[ ]+": " ",
59 | " :": ":",
60 | " ؛": "؛",
61 | " ،": "،",
62 | " \.": ".",
63 | " !": "!",
64 | "!+": "!",
65 | " ؟": "؟",
66 | "؟+": "؟",
67 | "^و [^0-9]": "و",
68 | " و [^0-9]": "و",
69 | "ـ": ""
70 | }
71 |
72 | for replaceItem in replaceList:
73 | findAndReplace.SearchString = replaceItem
74 | findAndReplace.ReplaceString = replaceList[replaceItem]
75 | currentDoc.replaceAll(findAndReplace)
76 |
--------------------------------------------------------------------------------
/FixCommonArabicMistakes-Python.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | FCAM script (Fix Common Arabic Mistakes).
5 | =========================================
6 |
7 | DESCRIPTION:
8 | Python Open/LibreOffice macro use Regex to fix common Arabic mistakes.
9 |
10 | What's it doing?
11 | - Removes any empty lines.
12 | - Removes Arabic Kashida (Tatweel "ـ").
13 | - Removes whitespaces after single WAW letter (و).
14 | - Removes whitespaces before Arabic punctuation (؛،.؟:!).
15 | - Removes any sequence of question and exclamation marks and replace them with one only (e.g. "!!!" and "??").
16 | - Removes whitespaces in inside brackets "()" and "[]".
17 | - Removes any sequence of whitespaces and whitespaces at end of lines.
18 | - Replaces Latin comma (decimal separator) "," after Arabic words to Arabic comma "،".
19 | - Replaces Latin semicolon ";" after Arabic words to Arabic semicolon "؛".
20 |
21 | USING:
22 | 1. Install.
23 | By default Open/LibreOffice supports Basic natively, and if you want to use python, you have to install an extra package.
24 |
25 | For Ubuntu:
26 | sudo apt-get install libreoffice-script-provider-python
27 |
28 | Then, copy the macro to scripts path.
29 |
30 | - Global to make it available for all users:
31 | /usr/lib/libreoffice/share/Scripts/python/
32 |
33 | - OR under your home:
34 | ~/.config/libreoffice/4/user/Scripts/python/
35 |
36 | 2. Run.
37 | From the menu:
38 | Tools ▸ Macros ▸ Run Macro ▸ LibreOffice Macros ▸ FixCommonArabicMistakes-Python
39 |
40 | VERSION:
41 | v0.1 - November 2015.
42 |
43 | BY:
44 | Ahmed M. AbouZaid (http://tech.aabouzaid.com/) - Under GPL v2.0 or later.
45 | """
46 | import uno
47 |
48 | def FixCommonArabicMistakes():
49 | replaceList = {
50 | "(\p{script=arabic}\W?)([ ]?;)": "$1؛",
51 | "(\p{script=arabic}\W?)([ ]?,)": "$1،",
52 | "\([ ]+": "(",
53 | "[ ]+\)": ")",
54 | "\[[ ]+": "[",
55 | "[ ]+\]": "]",
56 | "^[ ]+$": "",
57 | "^[ ]+": "",
58 | "[ ]+$": "",
59 | "[ ]+": " ",
60 | "” ": "”",
61 | " “": "“",
62 | " :": ":",
63 | " ؛": "؛",
64 | " ،": "،",
65 | " \.": ".",
66 | " !": "!",
67 | "!+": "!",
68 | " ؟": "؟",
69 | "؟+": "؟",
70 | "^و (?![0-9])": "و",
71 | " و (?![0-9])": " و",
72 | "ـ": ""
73 | }
74 |
75 | currentDoc = XSCRIPTCONTEXT.getDocument()
76 | findAndReplace = currentDoc.createReplaceDescriptor()
77 | findAndReplace.SearchCaseSensitive = True
78 | findAndReplace.SearchRegularExpression = True
79 | for replaceItem in replaceList:
80 | findAndReplace.SearchString = replaceItem
81 | findAndReplace.ReplaceString = replaceList[replaceItem]
82 | currentDoc.replaceAll(findAndReplace)
83 | return None
84 |
--------------------------------------------------------------------------------