├── TODO ├── .gitignore ├── Rakefile ├── lib ├── virastar │ └── version.rb └── virastar.rb ├── Gemfile ├── spec ├── spec_helper.rb └── virastar_spec.rb ├── Gemfile.lock ├── virastar.gemspec ├── LICENSE └── README.md /TODO: -------------------------------------------------------------------------------- 1 | - translate to js -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | pkg/* 2 | *.gem 3 | .bundle 4 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | Bundler::GemHelper.install_tasks 3 | -------------------------------------------------------------------------------- /lib/virastar/version.rb: -------------------------------------------------------------------------------- 1 | module Virastar 2 | VERSION = "0.0.6" 3 | end 4 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | # Specify your gem's dependencies in virastar.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 2 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 3 | 4 | require 'virastar' 5 | require 'rspec' 6 | require 'rspec/autorun' 7 | 8 | RSpec.configure do |config| 9 | 10 | end -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: . 3 | specs: 4 | virastar (0.0.6) 5 | 6 | GEM 7 | remote: http://rubygems.org/ 8 | specs: 9 | diff-lcs (1.1.2) 10 | rspec (2.1.0) 11 | rspec-core (~> 2.1.0) 12 | rspec-expectations (~> 2.1.0) 13 | rspec-mocks (~> 2.1.0) 14 | rspec-core (2.1.0) 15 | rspec-expectations (2.1.0) 16 | diff-lcs (~> 1.1.2) 17 | rspec-mocks (2.1.0) 18 | 19 | PLATFORMS 20 | ruby 21 | 22 | DEPENDENCIES 23 | bundler (~> 1.0.0) 24 | rspec (~> 2.1.0) 25 | virastar! 26 | -------------------------------------------------------------------------------- /virastar.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | $:.push File.expand_path("../lib", __FILE__) 3 | require "virastar/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "virastar" 7 | s.version = Virastar::VERSION 8 | s.platform = Gem::Platform::RUBY 9 | s.authors = ["Allen A. Bargi"] 10 | s.email = ["allen.bargi@gmail.com"] 11 | s.homepage = "http://github.com/aziz/virastar" 12 | s.summary = %q{cleanning up Persian text!} 13 | s.description = %q{cleanning up Persian text!} 14 | s.files = `git ls-files`.split("\n") 15 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 16 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } 17 | s.require_paths = ["lib"] 18 | s.rubyforge_project = "virastar" 19 | s.extra_rdoc_files = [ "LICENSE", "README.md"] 20 | s.rdoc_options = ["--charset=UTF-8"] 21 | s.add_development_dependency(%q, ["~> 2.1.0"]) 22 | s.add_development_dependency(%q, ["~> 1.0.0"]) 23 | end 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Allen A. Bargi 2 | 3 | Permission is hereby granted, free of charge, to any person ob- 4 | taining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without restric- 6 | tion, including without limitation the rights to use, copy, modi- 7 | fy, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is fur- 9 | nished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONIN- 17 | FRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ----- 2 | #ویراستار 3 | نوشته‌های فارسی شما را ویرایش می‌کند 4 | 5 | ----- 6 | Virastar (in Persian:ویراستار) 7 | 8 | 9 | ## Specifications 10 | 11 | ###Virastar 12 | * should add persian_cleanup method to String class 13 | * should replace Arabic kaf with its Persian equivalent 14 | * should replace Arabic Yeh with its Persian equivalent 15 | * should replace Arabic numbers with their Persian equivalent 16 | * should replace English numbers with their Persian equivalent 17 | * should replace English comma and semicolon with their Persian equivalent 18 | * should correct :;,.?! spacing (one space after and no space before) 19 | * should replace English quotes with their Persian equivalent 20 | * should replace three dots with ellipsis 21 | * should convert ه ی to هٔ 22 | * should replace double dash to ndash and triple dash to mdash 23 | * should replace more than one space with just a single one 24 | * should remove unnecessary zwnj chars that are succeeded/preceded by a space 25 | * should fix spacing for () [] {} “” «» (one space outside, no space inside) 26 | * should replace English percent sign to its Persian equivalent 27 | * should replace more that one line breaks with just one 28 | * should not replace line breaks 29 | * should put zwnj between word and prefix/suffix (ha haye* tar* tarin mi* nemi*) 30 | * should not replace English numbers in English phrases 31 | * should not destroy urls in the text 32 | 33 | #### aggressive editing 34 | * should replace more than one ! or ? mark with just one 35 | * should remove all kashidas 36 | 37 | ----- 38 | ## Install 39 | gem install virastar 40 | 41 | ## Usage 42 | "فارسي را كمی درست تر می نويسيم".persian_cleanup # => "فارسی را کمی درست‌تر می‌نویسیم" 43 | 44 | virastar comes with a list of flags to control its behavior, all flags are turned on by default but you can 45 | turn them off by passing an options hash to the `persian_cleanup` method 46 | 47 | "سلام 123".persian_cleanup(:fix_english_numbers => false) # => "سلام 123" 48 | 49 | here is the list of all flags: 50 | 51 | * `fix_dashes` 52 | * `fix_three_dots` 53 | * `fix_english_quotes` 54 | * `fix_hamzeh` 55 | * `cleanup_zwnj` 56 | * `fix_spacing_for_braces_and_quotes` 57 | * `fix_arabic_numbers` 58 | * `fix_english_numbers` 59 | * `fix_misc_non_persian_chars` 60 | * `fix_perfix_spacing` 61 | * `fix_suffix_spacing` 62 | * `aggresive` 63 | * `cleanup_kashidas` 64 | * `cleanup_extra_marks` 65 | * `cleanup_spacing` 66 | * `cleanup_begin_and_end` 67 | 68 | ## Acknowledgment 69 | Virastar is highly inspired by [Virasbaz](http://virasbaz.persianlanguage.ir). 70 | 71 | ## Note on Patches/Pull Requests 72 | 73 | * Fork the project. 74 | * Make your feature addition or bug fix. 75 | * Add tests for it. This is important so I don't break it in a 76 | future version unintentionally. 77 | * Commit, do not mess with rakefile, version, or history. 78 | (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull) 79 | * Send me a pull request. Bonus points for topic branches. 80 | 81 | ## Copyright 82 | 83 | Copyright (c) 2011 Allen A. Bargi. See LICENSE for details. -------------------------------------------------------------------------------- /lib/virastar.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | if RUBY_VERSION.to_f < 1.9 3 | require 'jcode' 4 | $KCODE = 'u' 5 | end 6 | 7 | module Virastar 8 | 9 | class PersianEditor 10 | def initialize(text,options) 11 | @text = text 12 | @fix_dashes = options[:fix_dashes] || true 13 | @fix_three_dots = options[:fix_three_dots] || true 14 | @fix_english_quotes = options[:fix_english_quotes] || true 15 | @fix_hamzeh = options[:fix_hamzeh] || true 16 | @cleanup_zwnj = options[:cleanup_zwnj] || true 17 | @fix_spacing_for_braces_and_quotes = options[:fix_spacing_for_braces_and_quotes] || true 18 | @fix_arabic_numbers = options[:fix_arabic_numbers] || true 19 | @fix_english_numbers = options[:fix_english_numbers] || true 20 | @fix_misc_non_persian_chars = options[:fix_misc_non_persian_chars] || true 21 | @fix_perfix_spacing = options[:fix_perfix_spacing] || true 22 | @fix_suffix_spacing = options[:fix_suffix_spacing] || true 23 | @aggresive = options[:aggresive] || true 24 | @cleanup_kashidas = options[:cleanup_kashidas] || true 25 | @cleanup_extra_marks = options[:cleanup_extra_marks] || true 26 | @cleanup_spacing = options[:cleanup_spacing] || true 27 | @cleanup_begin_and_end = options[:cleanup_begin_and_end] || true 28 | end 29 | 30 | def cleanup 31 | text = @text 32 | 33 | # removing URLS bringing them back at the end of process 34 | urls = [] 35 | i = 0 36 | text.gsub!(/https?:\/\/([-\w\.]+)+(:\d+)?(\/([\w\/_\.]*(\?\S+)?)?)?/) do |s| 37 | urls[i] = s.dup 38 | i += 1 39 | "__urls__#{i}__" 40 | end 41 | 42 | # replace double dash to ndash and triple dash to mdash 43 | if @fix_dashes 44 | text.gsub!(/-{3}/,'—') 45 | text.gsub!(/-{2}/,'–') 46 | end 47 | 48 | # replace three dots with ellipsis 49 | text.gsub!(/\s*\.{3,}/,'…') if @fix_three_dots 50 | 51 | # replace English quotes with their Persian equivalent 52 | text.gsub!(/(["'`]+)(.+?)(\1)/, '«\2»') if @fix_english_quotes 53 | 54 | # should convert ه ی to ه 55 | text.gsub!(/(\S)(ه[\s‌]+[یي])(\s)/, '\1هٔ\3') if @fix_hamzeh 56 | 57 | # remove unnecessary zwnj char that are succeeded/preceded by a space 58 | text.gsub!(/\s+‌|‌\s+/,' ') if @cleanup_zwnj 59 | 60 | # character replacement 61 | persian_numbers = "۱۲۳۴۵۶۷۸۹۰" 62 | arabic_numbers = "١٢٣٤٥٦٧٨٩٠" 63 | english_numbers = "1234567890" 64 | bad_chars = ",;كي%" 65 | good_chars = "،؛کی٪" 66 | text.tr!(english_numbers,persian_numbers) if @fix_english_numbers 67 | text.tr!(arabic_numbers,persian_numbers) if @fix_arabic_numbers 68 | text.tr!(bad_chars,good_chars) if @fix_misc_non_persian_chars 69 | 70 | # should not replace exnglish chars in english phrases 71 | text.gsub!(/([a-zA-Z\-_]{2,}[۰-۹]+|[۰-۹]+[a-zA-Z\-_]{2,})/i) do |s| 72 | s.tr(persian_numbers,english_numbers) 73 | end 74 | 75 | # put zwnj between word and prefix (mi* nemi*) 76 | # there's a possible bug here: می and نمی could be separate nouns and not prefix 77 | if @fix_perfix_spacing 78 | text.gsub!(/\s+(ن?می)\s+/,' \1‌') 79 | end 80 | 81 | # put zwnj between word and suffix (*tar *tarin *ha *haye) 82 | # there's a possible bug here: های and تر could be separate nouns and not suffix 83 | if @fix_suffix_spacing 84 | text.gsub!(/\s+(تر(ی(ن)?)?|ها(ی)?)\s+/,'‌\1 ') # in case you can not read it: \s+(tar(i(n)?)?|ha(ye)?)\s+ 85 | end 86 | 87 | # -- Aggressive Editing ------------------------------------------ 88 | if @aggresive 89 | 90 | # replace more than one ! or ? mark with just one 91 | if @cleanup_extra_marks 92 | text.gsub!(/(!){2,}/, '\1') 93 | text.gsub!(/(؟){2,}/, '\1') 94 | end 95 | 96 | # should remove all kashida 97 | text.gsub!(/ـ+/,"") if @cleanup_kashidas 98 | 99 | end 100 | # ---------------------------------------------------------------- 101 | 102 | # should fix outside and inside spacing for () [] {} “” «» 103 | if @fix_spacing_for_braces_and_quotes 104 | text.gsub!(/[ \t‌]*(\()\s*([^)]+?)\s*?(\))[ \t‌]*/,' \1\2\3 ') 105 | text.gsub!(/[ \t‌]*(\[)\s*([^\]]+?)\s*?(\])[ \t‌]*/,' \1\2\3 ') 106 | text.gsub!(/[ \t‌]*(\{)\s*([^}]+?)\s*?(\})[ \t‌]*/,' \1\2\3 ') 107 | text.gsub!(/[ \t‌]*(“)\s*([^”]+?)\s*?(”)[ \t‌]*/,' \1\2\3 ') 108 | text.gsub!(/[ \t‌]*(«)\s*([^»]+?)\s*?(»)[ \t‌]*/,' \1\2\3 ') 109 | end 110 | 111 | # : ; , . ! ? and their persian equivalents should have one space after and no space before 112 | if @fix_spacing_for_braces_and_quotes 113 | text.gsub!(/[ \t‌]*([:;,؛،.؟!]{1})[ \t‌]*/, '\1 ') 114 | # do not put space after colon that separates time parts 115 | text.gsub!(/([۰-۹]+):\s+([۰-۹]+)/, '\1:\2') 116 | end 117 | 118 | # should fix inside spacing for () [] {} “” «» 119 | if @fix_spacing_for_braces_and_quotes 120 | text.gsub!(/(\()\s*([^)]+?)\s*?(\))/,'\1\2\3') 121 | text.gsub!(/(\[)\s*([^\]]+?)\s*?(\])/,'\1\2\3') 122 | text.gsub!(/(\{)\s*([^}]+?)\s*?(\})/,'\1\2\3') 123 | text.gsub!(/(“)\s*([^”]+?)\s*?(”)/,'\1\2\3') 124 | text.gsub!(/(«)\s*([^»]+?)\s*?(»)/,'\1\2\3') 125 | end 126 | 127 | # should replace more than one space with just a single one 128 | if @cleanup_spacing 129 | text.gsub!(/[ ]+/,' ') 130 | text.gsub!(/([\n]+)[ \t‌]*/,'\1') 131 | end 132 | 133 | # remove spaces, tabs, and new lines from the beginning and enf of file 134 | text.strip! if @cleanup_begin_and_end 135 | 136 | # bringing back urls 137 | text.gsub!(/__urls__\d+__/) do |s| 138 | urls[s.split("__").last.to_i - 1] 139 | end 140 | 141 | text 142 | end 143 | 144 | end 145 | end 146 | 147 | module VirastarStingExtensions 148 | def persian_cleanup(options = {}) 149 | editor = Virastar::PersianEditor.new(self.dup,options) 150 | return editor.cleanup 151 | end 152 | end 153 | 154 | String.send(:include, VirastarStingExtensions) 155 | -------------------------------------------------------------------------------- /spec/virastar_spec.rb: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.expand_path(File.dirname(__FILE__) + '/spec_helper') 3 | 4 | describe Virastar do 5 | 6 | it "should add persian_cleanup method to String class" do 7 | test = "test string" 8 | test.should respond_to(:persian_cleanup) 9 | end 10 | 11 | it "should replace Arabic kaf with its Persian equivalent" do 12 | test = "ك" 13 | test2 = "كمك" 14 | result = "ک" 15 | result2 = "کمک" 16 | test.persian_cleanup.should == result 17 | test2.persian_cleanup.should == result2 18 | end 19 | 20 | it "should replace Arabic Yeh with its Persian equivalent" do 21 | test = "ي" 22 | test2 = "بيني" 23 | result = "ی" 24 | result2 = "بینی" 25 | test.persian_cleanup.should == result 26 | test2.persian_cleanup.should == result2 27 | end 28 | 29 | it "should replace Arabic numbers with their Persian equivalent" do 30 | test = "٠١٢٣٤٥٦٧٨٩" 31 | result = "۰۱۲۳۴۵۶۷۸۹" 32 | test.persian_cleanup.should == result 33 | end 34 | 35 | it "should replace English numbers with their Persian equivalent" do 36 | test = "0123456789" 37 | result = "۰۱۲۳۴۵۶۷۸۹" 38 | test.persian_cleanup.should == result 39 | end 40 | 41 | it "should replace English comma and semicolon with their Persian equivalent" do 42 | test = ";," 43 | result = "؛ ،" 44 | test.persian_cleanup.should == result 45 | end 46 | 47 | it "should correct :;,.?! spacing (one space after and no space before)" do 48 | test = "گفت : سلام" 49 | result = "گفت: سلام" 50 | test2 = "salam.\n\nkhoobi" 51 | result2 = "salam. \n\nkhoobi" 52 | test.persian_cleanup.should == result 53 | test2.persian_cleanup.should == result2 54 | end 55 | 56 | it "should replace English quotes with their Persian equivalent" do 57 | test = "''تست''" 58 | test2 = "'تست'" 59 | test3 = "\"گفت: سلام\"" 60 | test4 = "`تست`" 61 | test5 = "``تست``" 62 | result = result2 = result4 = result5 = "«تست»" 63 | result3 = "«گفت: سلام»" 64 | # not greedy 65 | test6 = '"this" or "that"' 66 | result6 = '«this» or «that»' 67 | test.persian_cleanup.should == result 68 | test2.persian_cleanup.should == result2 69 | test3.persian_cleanup.should == result3 70 | test4.persian_cleanup.should == result4 71 | test5.persian_cleanup.should == result5 72 | test6.persian_cleanup.should == result6 73 | end 74 | 75 | it "should replace three dots with ellipsis" do 76 | test = "..." 77 | result = "…" 78 | test2 = "...." 79 | result2 = "…" 80 | test3 = "خداحافظ ... به به" 81 | result3 = "خداحافظ… به به" 82 | test4 = "........." 83 | result4 = "…" 84 | test.persian_cleanup.should == result 85 | test2.persian_cleanup.should == result2 86 | test3.persian_cleanup.should == result3 87 | test4.persian_cleanup.should == result4 88 | end 89 | 90 | it "should convert ه ی to هٔ" do 91 | test = "خانه ی ما" 92 | test2 = "خانه ی ما" 93 | test3 = "خانه ي ما" 94 | result = result2 = result3 = "خانهٔ ما" 95 | test.persian_cleanup.should == result 96 | test2.persian_cleanup.should == result2 97 | test3.persian_cleanup.should == result3 98 | end 99 | 100 | it "should replace double dash to ndash and triple dash to mdash" do 101 | test = "--" 102 | test2 = "---" 103 | result = "–" 104 | result2 = "—" 105 | test.persian_cleanup.should == result 106 | test2.persian_cleanup.should == result2 107 | end 108 | 109 | it "should replace more than one space with just a single one" do 110 | test = " hello world! I'm virastar " 111 | result = "hello world! I'm virastar" 112 | test.persian_cleanup.should == result 113 | end 114 | 115 | it "should remove unnecessary zwnj chars that are succeeded/preceded by a space" do 116 | test = "سلام‌ دنیا" # before 117 | result = "سلام دنیا" 118 | test2 = "سلام ‌دنیا" #after 119 | result2 = "سلام دنیا" 120 | test.persian_cleanup.should == result 121 | test2.persian_cleanup.should == result2 122 | end 123 | 124 | it "should fix spacing for () [] {} “” «» (one space outside, no space inside)" do 125 | # matched brackets 126 | [ ["(",")"],["[","]"],["{","}"],["“","”"],["«","»"] ].each do |b| 127 | test = "this is#{b[0]} a test#{b[1]}" 128 | test2 = "this is #{b[0]} a test #{b[1]}" 129 | test3 = "this is #{b[0]} a test #{b[1]} yeah!" 130 | test4 = "this is #{b[0]}a test #{b[1]} yeah!" 131 | result = "this is #{b[0]}a test#{b[1]}" 132 | result2 = "this is #{b[0]}a test#{b[1]}" 133 | result3 = "this is #{b[0]}a test#{b[1]} yeah!" 134 | result4 = "this is #{b[0]}a test#{b[1]} yeah!" 135 | test.persian_cleanup.should == result 136 | test2.persian_cleanup.should == result2 137 | test3.persian_cleanup.should == result3 138 | test4.persian_cleanup.should == result4 139 | end 140 | 141 | # mismatched brackets 142 | [ ["(","]"],["[",")"],["{","”"],["(","}"],["«","]"] ].each do |b| 143 | test = "mismatched brackets#{b[0]} don't apply#{b[1]}" 144 | test2 = "mismatched brackets #{b[0]} don't apply #{b[1]}" 145 | test3 = "mismatched brackets #{b[0]} don't apply #{b[1]} yeah!" 146 | test4 = "mismatched brackets #{b[0]}don't apply #{b[1]} yeah!" 147 | test.persian_cleanup.should == test 148 | test2.persian_cleanup.should == test2 149 | test3.persian_cleanup.should == test3 150 | test4.persian_cleanup.should == test4 151 | end 152 | 153 | end 154 | 155 | it "should replace English percent sign to its Persian equivalent" do 156 | test = "%" 157 | result = "٪" 158 | test.persian_cleanup.should == result 159 | end 160 | 161 | it "should replace more that one line breaks with just one" do 162 | test = "this is \n \n \n \n a test" 163 | result = "this is \n\n\n\na test" 164 | test2 = "this is\n\n\n\na test" 165 | result2 = "this is\n\n\n\na test" 166 | test3 = "this is \n\n\n a test" 167 | result3 = "this is \n\n\na test" 168 | 169 | test.persian_cleanup.should == result 170 | test2.persian_cleanup.should == result2 171 | test3.persian_cleanup.should == result3 172 | end 173 | 174 | it "should not replace line breaks and should remove spaces after line break" do 175 | test = "this is \n a test" 176 | result = "this is \na test" 177 | test.persian_cleanup.should == result 178 | end 179 | 180 | it "should put zwnj between word and prefix/suffix (ha haye* tar* tarin mi* nemi*)" do 181 | test = "ما می توانیم" 182 | result = "ما می‌توانیم" 183 | test2 = "ما نمی توانیم" 184 | result2 = "ما نمی‌توانیم" 185 | test3 = "این بهترین کتاب ها است" 186 | result3 = "این بهترین کتاب‌ها است" 187 | test4 = "بزرگ تری و قدرتمند ترین زبان های دنیا" 188 | result4 = "بزرگ‌تری و قدرتمند‌ترین زبان‌های دنیا" 189 | test.persian_cleanup.should == result 190 | end 191 | 192 | it "should not replace English numbers in English phrases" do 193 | test = "عزیز ATM74 در IBM-96 085 B 95BCS" 194 | result = "عزیز ATM74 در IBM-96 ۰۸۵ B 95BCS" 195 | test.persian_cleanup.should == result 196 | end 197 | 198 | it "should not create spacing for something like (,)" do 199 | test = "this is (,) comma" 200 | result = "this is (،) comma" 201 | test.persian_cleanup.should == result 202 | end 203 | 204 | it "should not puts space after time colon separator" do 205 | test = "12:34" 206 | result = "۱۲:۳۴" 207 | test.persian_cleanup.should == result 208 | end 209 | 210 | it "should not destroy URLs" do 211 | test = "http://virastar.heroku.com" 212 | result = "http://virastar.heroku.com" 213 | test2 = "http://virastar.heroku.com\nhttp://balatarin.com" 214 | result2 = "http://virastar.heroku.com\nhttp://balatarin.com" 215 | test.persian_cleanup.should == result 216 | test2.persian_cleanup.should == result2 217 | end 218 | 219 | it "should not replace line breaks when the line ends with quotes" do 220 | test = "salam \"khoobi\" \n chetori" 221 | result = "salam «khoobi» \nchetori" 222 | test.persian_cleanup.should == result 223 | end 224 | 225 | it "should not put space after quotes, {}, () or [] if there's ,.; just after that" do 226 | test = "«This», {this}, (this), [this] or {this}. sometimes (this)." 227 | result = "«This»، {this}، (this)، [this] or {this}. sometimes (this)." 228 | test.persian_cleanup.should == result 229 | end 230 | 231 | it "should be able to convert numbers with dashes" do 232 | test = "1- salam" 233 | result = "۱- salam" 234 | test.persian_cleanup.should == result 235 | end 236 | 237 | context "aggressive editing" do 238 | it "should replace more than one ! or ? mark with just one" do 239 | test = "salam!!!" 240 | result = "salam!" 241 | test2 = "چطور؟؟؟" 242 | result2 = "چطور؟" 243 | test.persian_cleanup.should == result 244 | test2.persian_cleanup.should == result2 245 | end 246 | 247 | it "should remove all kashida" do 248 | test = "سلامـــت" 249 | result = "سلامت" 250 | test.persian_cleanup.should == result 251 | end 252 | 253 | it "should correct wrong connections like in میشود or میدهد" 254 | end 255 | 256 | end 257 | --------------------------------------------------------------------------------