├── .gitignore ├── lib ├── arabic-letter-connector │ ├── version.rb │ ├── string.rb │ └── logic.rb └── arabic-letter-connector.rb ├── arabic-letter-connector.gemspec ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | -------------------------------------------------------------------------------- /lib/arabic-letter-connector/version.rb: -------------------------------------------------------------------------------- 1 | module ArabicLetterConnector 2 | VERSION = "0.1.2" 3 | end 4 | -------------------------------------------------------------------------------- /lib/arabic-letter-connector/string.rb: -------------------------------------------------------------------------------- 1 | class String 2 | def connect_arabic_letters 3 | ArabicLetterConnector.transform(self) 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /lib/arabic-letter-connector.rb: -------------------------------------------------------------------------------- 1 | require 'arabic-letter-connector/version' 2 | require 'arabic-letter-connector/logic' 3 | require 'arabic-letter-connector/string' 4 | -------------------------------------------------------------------------------- /arabic-letter-connector.gemspec: -------------------------------------------------------------------------------- 1 | $:.unshift(File.join(File.dirname(__FILE__), 'lib')) 2 | 3 | require 'arabic-letter-connector/version' 4 | 5 | Gem::Specification.new do |s| 6 | 7 | s.name = 'arabic-letter-connector' 8 | s.version = ArabicLetterConnector::VERSION 9 | s.date = '2013-05-29' 10 | s.summary = 'Arabic Letter Connector' 11 | s.description = 'A tool to replace generic disconnected Arabic letters with their connected counterparts.' 12 | s.authors = ["Sinan Taifour", "Ahmed Nasser"] 13 | s.email = 'sinan@taifour.com' 14 | s.homepage = 'http://github.com/staii/arabic-letter-connector' 15 | 16 | s.files = `git ls-files`.split("\n") 17 | s.require_paths = ['lib'] 18 | 19 | end 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2013-2018 Sinan Taifour 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Arabic Letter Connector 2 | ======================= 3 | 4 | Motivation 5 | ---------- 6 | 7 | Arabic is not always well-supported in all libraries. There are two problems that often occur when 8 | attempting to present Arabic text: 9 | 10 | 1. The letters appear in reverse order (often because there is no right-to-left support). 11 | 2. The letters appear disconnected. 12 | 13 | This gem deals with the second problem. If you are this problem, it normally means your 14 | string use the _generic_ form of every Arabic letter (that is, without any attributed form, 15 | for example a _Qaf_ not a _Qaf at the beginning of a word_), and the library 16 | you are using to present this string doesn't do anything about it. 17 | 18 | What this gem does is replace each such _generic_ character to a character _with form_. 19 | 20 | Acknowledgment 21 | -------------- 22 | 23 | This gem is a refactored version of `Arabic-Prawn` by Dynamix Solutions (Ahmed Nasser). 24 | 25 | Installation 26 | ------------ 27 | 28 | Simply run: 29 | 30 | gem install arabic-letter-connector 31 | 32 | Then require it with: 33 | 34 | require 'arabic-letter-connector' 35 | 36 | Usage 37 | ----- 38 | 39 | The gem provides a `ArabicLetterConnector.transform(string)` method, and also monkey-patches `String` 40 | to include a `connect_arabic_letters` method. 41 | 42 | Below is an example. In the browser, it might appear that this library is doing nothing (since the browser 43 | does the work of converting the characters from their generic form considering their correct form). Try 44 | it in IRB to get a sense of what is happening. 45 | 46 | x = "مرحبا يا العالم" 47 | x[0].unpack("C*") # [217, 133] 48 | y = x.connect_arabic_letters # "ﻣﺮﺣﺒﺎ ﻳﺎ ﺍﻟﻌﺎﻟﻢ" 49 | y[0].unpack("C*") # [239, 187, 163] 50 | 51 | This gem is particular useful if you are using `prawn` to generate PDF files. 52 | 53 | require 'prawn' 54 | require 'arabic-letter-connector' 55 | Prawn::Document.generate("arabic.pdf") do 56 | text_direction :rtl 57 | font("/path/to/arabic/font.ttf") do # For example: http://www.amirifont.org/ 58 | text "مرحبا يا العالم".connect_arabic_letters 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /lib/arabic-letter-connector/logic.rb: -------------------------------------------------------------------------------- 1 | module ArabicLetterConnector 2 | 3 | @@charinfos = nil 4 | 5 | class CharacterInfo 6 | 7 | attr_accessor :common , :formatted 8 | 9 | def initialize(common, isolated, final, initial, medial, connects, diacritic, conditionals) 10 | @common = common 11 | @formatted = { 12 | :isolated => isolated, 13 | :final => final, 14 | :initial => initial, 15 | :medial => medial, 16 | } 17 | @connects = connects 18 | @diacritic = diacritic 19 | @conditionals = conditionals 20 | end 21 | 22 | def connects? 23 | @connects 24 | end 25 | 26 | def diacritic? 27 | @diacritic 28 | end 29 | 30 | def conditionals 31 | @conditionals 32 | end 33 | end 34 | 35 | # Determine the form of the current character (:isolated, :initial, :medial, 36 | # or :final), given the previous character and the next one. In Arabic, all 37 | # characters can connect with a previous character, but not all letters can 38 | # connect with the next character (this is determined by 39 | # CharacterInfo#connects?). 40 | def self.determine_form(previous_previous_char, previous_char, next_char, next_next_char) 41 | charinfos = self.charinfos 42 | next_char = next_next_char if charinfos[next_char] && charinfos[next_char].diacritic? 43 | previous_char = previous_previous_char if charinfos[previous_char] && charinfos[previous_char].diacritic? 44 | if charinfos[previous_char] && charinfos[next_char] 45 | charinfos[previous_char].connects? ? :medial : :initial # If the current character does not connect, 46 | # its medial form will map to its final form, 47 | # and its initial form will map to its isolated form. 48 | elsif charinfos[previous_char] # The next character is not an arabic character. 49 | charinfos[previous_char].connects? ? :final : :isolated 50 | elsif charinfos[next_char] # The previous character is not an arabic character. 51 | :initial # If the current character does not connect, its initial form will map to its isolated form. 52 | else # Neither of the surrounding characters are arabic characters. 53 | :isolated 54 | end 55 | end 56 | 57 | def self.transform(str) 58 | res = "" 59 | charinfos = self.charinfos 60 | previous_previous_char = nil 61 | previous_char = nil 62 | current_char = nil 63 | next_char = nil 64 | next_next_char = nil 65 | consume_character = lambda do |char| 66 | previous_previous_char = previous_char 67 | previous_char = current_char 68 | current_char = next_char 69 | next_char = next_next_char 70 | next_next_char = char 71 | return unless current_char 72 | if charinfos.keys.include?(current_char) 73 | form = determine_form(previous_previous_char, previous_char, next_char, next_next_char) 74 | conditional = charinfos[current_char].conditionals[previous_char] 75 | if conditional 76 | res = res[0..-2] + conditional 77 | else 78 | res += charinfos[current_char].formatted[form] 79 | end 80 | else 81 | res += current_char 82 | end 83 | end 84 | str.each_char { |char| consume_character.call(char) } 85 | 2.times { consume_character.call(nil) } 86 | res.gsub!(/\d+/) { |m| m.reverse } 87 | return res 88 | end 89 | 90 | private 91 | def self.charinfos 92 | return @@charinfos unless @@charinfos.nil? 93 | @@charinfos = {} 94 | add("0627", "fe8d", "fe8e", "fe8d", "fe8e", false, false, { 95 | '0644' => 'fefb', # لا 96 | 'fede' => 'fefc' # ـلا 97 | }) # Alef 98 | add("0628", "fe8f", "fe90", "fe91", "fe92", true) # Ba2 99 | add("062a", "fe95", "fe96", "fe97", "fe98", true) # Ta2 100 | add("062b", "fe99", "fe9a", "fe9b", "fe9c", true) # Tha2 101 | add("062c", "fe9d", "fe9e", "fe9f", "fea0", true) # Jeem 102 | add("062d", "fea1", "fea2", "fea3", "fea4", true) # 7a2 103 | add("062e", "fea5", "fea6", "fea7", "fea8", true) # 7'a2 104 | add("062f", "fea9", "feaa", "fea9", "feaa", false) # Dal 105 | add("0630", "feab", "feac", "feab", "feac", false) # Thal 106 | add("0631", "fead", "feae", "fead", "feae", false) # Ra2 107 | add("0632", "feaf", "feb0", "feaf", "feb0", false) # Zain 108 | add("0633", "feb1", "feb2", "feb3", "feb4", true) # Seen 109 | add("0634", "feb5", "feb6", "feb7", "feb8", true) # Sheen 110 | add("0635", "feb9", "feba", "febb", "febc", true) # 9ad 111 | add("0636", "febd", "febe", "febf", "fec0", true) # 9'ad 112 | add("0637", "fec1", "fec2", "fec3", "fec4", true) # 6a2 113 | add("0638", "fec5", "fec6", "fec7", "fec8", true) # 6'a2 114 | add("0639", "fec9", "feca", "fecb", "fecc", true) # 3ain 115 | add("063a", "fecd", "fece", "fecf", "fed0", true) # 3'ain 116 | add("0641", "fed1", "fed2", "fed3", "fed4", true) # Fa2 117 | add("0642", "fed5", "fed6", "fed7", "fed8", true) # Qaf 118 | add("0643", "fed9", "feda", "fedb", "fedc", true) # Kaf 119 | add("0644", "fedd", "fede", "fedf", "fee0", true) # Lam 120 | add("0645", "fee1", "fee2", "fee3", "fee4", true) # Meem 121 | add("0646", "fee5", "fee6", "fee7", "fee8", true) # Noon 122 | add("0647", "fee9", "feea", "feeb", "feec", true) # Ha2 123 | add("0648", "feed", "feee", "feed", "feee", false) # Waw 124 | add("064a", "fef1", "fef2", "fef3", "fef4", true) # Ya2 125 | add("0621", "fe80", "fe80", "fe80", "fe80", false) # Hamza 126 | add("0622", "fe81", "fe82", "fe81", "fe82", false, false, { 127 | '0644' => 'fef5', # لآ 128 | 'fede' => 'fef6' # ـلآ 129 | }) # Alef Madda 130 | add("0623", "fe83", "fe84", "fe83", "fe84", false, false, { 131 | '0644' => 'fefa', # لأ 132 | 'fede' => 'fef8' # ـلأ 133 | }) # Alef Hamza Above 134 | add("0624", "fe85", "fe86", "fe85", "fe86", false) # Waw Hamza 135 | add("0625", "fe87", "fe88", "fe87", "fe88", false, false, { 136 | '0644' => 'fef9', # لإ 137 | 'fede' => 'fefa' # ـلإ 138 | }) # Alef Hamza Below 139 | add("0626", "fe89", "fe8a", "fe8b", "fe8c", true) # Ya2 Hamza 140 | add("0629", "fe93", "fe94", "fe93", "fe94", false) # Ta2 Marbu6a 141 | add("0640", "0640", "0640", "0640", "0640", true) # Tatweel 142 | add("0649", "feef", "fef0", "feef", "fef0", false) # Alef Layyina 143 | add("0651", "fe7c", "fe7c", "fe7c", "fe7d", false, true) # Shadda 144 | add("0652", "fe7e", "fe7e", "fe7e", "fe7f", false, true) # Sukun 145 | add("064e", "fe76", "fe76", "fe76", "fe77", false, true) # Fatha 146 | add("0650", "fe7a", "fe7a", "fe7a", "fe7b", false, true) # Kasra 147 | add("064f", "fe78", "fe78", "fe78", "fe79", false, true) # Damma 148 | add("0653", "0653", "0653", "0653", "0653", false, true) # Madda 149 | add("064b", "fe79", "fe70", "fe70", "fe71", false, true) # Fathatan 150 | add("064d", "fe74", "fe74", "fe74", "fe74", false, true) # Kasratan 151 | add("064c", "fe72", "fe72", "fe72", "fe72", false, true) # Dammatan 152 | @@charinfos 153 | end 154 | 155 | def self.add(common, isolated, final, initial, medial, connects, diacritic = false, conditionals={}) 156 | charinfo = CharacterInfo.new( 157 | [common.hex].pack("U"), 158 | [isolated.hex].pack("U"), 159 | [final.hex].pack("U"), 160 | [initial.hex].pack("U"), 161 | [medial.hex].pack("U"), 162 | connects, 163 | diacritic, 164 | conditionals.map { |k, v| [[k.hex].pack("U"), [v.hex].pack("U")] }.to_h 165 | ) 166 | @@charinfos[charinfo.common] = charinfo 167 | end 168 | 169 | end 170 | --------------------------------------------------------------------------------