├── README.md ├── assyst ├── README.md └── assyst.rb ├── bayes ├── README.md ├── naive_bayes.rb └── stop_words.list ├── dungeon └── dungeon.rb ├── markov ├── markov.rb └── styles.txt ├── oslab ├── README.md ├── disk_scheduling │ ├── cscan.rb │ ├── fcfs.rb │ ├── look.rb │ ├── scan.rb │ └── sstf.rb ├── memory_management │ ├── best_fit.rb │ ├── first_fit.rb │ └── worst_fit.rb └── page_replacement │ ├── cache.db │ ├── cache.db.p │ ├── fcfs.rb │ ├── lru.rb │ └── optimal.rb ├── practice └── deficient.rb ├── rsa ├── README.md ├── rsa.rb └── rsa_client.rb └── spellcheck ├── sanitize.rb ├── sanitized.txt └── spellcheck.rb /README.md: -------------------------------------------------------------------------------- 1 | # Ruby Experiments 2 | **** 3 | A few trivial Ruby projects for gaining a better understanding of the language and its idioms. 4 | 5 | >Copyright 2015 [Pawan Dubey](http://pawandubey.com) 6 | > 7 | >Licensed under the Apache License, Version 2.0 (the "License"); 8 | >you may not use this file except in compliance with the License. 9 | >You may obtain a copy of the License at 10 | > 11 | > http://www.apache.org/licenses/LICENSE-2.0 12 | > 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | -------------------------------------------------------------------------------- /assyst/README.md: -------------------------------------------------------------------------------- 1 | # Assyst - Rubu Experiment #1 2 | A crawler to fetch data from [MIT Manipal Web Portal](http://websismit.manipal.edu) 3 | -------------------------------------------------------------------------------- /assyst/assyst.rb: -------------------------------------------------------------------------------- 1 | require 'mechanize' 2 | 3 | REG_NO = '130905434' 4 | BIRTH_DATE = '1994-09-15' #YYYY-MM-DD 5 | 6 | agent = Mechanize.new 7 | 8 | begin 9 | homepage = agent.get('http://websismit.manipal.edu/websis/control/main') 10 | login_form = homepage.form_with(:name => 'loginform') 11 | login_form.idValue = REG_NO 12 | login_form.birthDate = BIRTH_DATE 13 | login_form.birthDate_i18n = BIRTH_DATE 14 | 15 | signed_in = agent.submit(login_form, login_form.buttons.first) 16 | 17 | fail SocketError if signed_in.nil? 18 | 19 | #signed_in 20 | puts signed_in.search('.loginbar').text 21 | acad_status = signed_in.link_with(text: 'Academic Status') 22 | 23 | fail SocketError if acad_status.nil? 24 | 25 | acad_status = acad_status.click 26 | 27 | if acad_status.nil? 28 | fail SocketError 29 | else 30 | attendance = acad_status.search('table#ListAttendanceSummary_table > tr').each do |row| 31 | row.search('td>span')[1..-1].each do |data| 32 | print data.text.strip + "\t" 33 | end 34 | puts 35 | end 36 | end 37 | 38 | rescue StandardError => e 39 | abort "There was an error while connecting to the site, please check your connection and try again : \n #{e.message}" 40 | end 41 | -------------------------------------------------------------------------------- /bayes/README.md: -------------------------------------------------------------------------------- 1 | # Naive Bayes Classifier in Ruby 2 | 3 | The naive Bayes classifier uses conditional probablity to classify text into multiple categories. 4 | This implementation includes Laplace smoothing and log space probability to avoid floating point underflow. 5 | -------------------------------------------------------------------------------- /bayes/naive_bayes.rb: -------------------------------------------------------------------------------- 1 | require 'lingua/stemmer' 2 | require 'set' 3 | 4 | module Bayes 5 | class Base 6 | attr_accessor :stemmer, :stop_words 7 | 8 | def initialize 9 | @stemmer = Lingua::Stemmer.new 10 | @stop_words = File.read("stop_words.list").split(/\W+/) 11 | end 12 | 13 | def tokenize(text) 14 | if block_given? 15 | text.split(/\W+/).each do |word| 16 | yield(@stemmer.stem(word)) unless @stop_words.include? word 17 | end 18 | end 19 | end 20 | end 21 | 22 | class Naive < Bayes::Base 23 | attr_accessor :model, :categories 24 | 25 | def initialize 26 | @model = Hash.new { |h,k| h[k] = Hash.new(0) } 27 | @categories = Set.new 28 | super 29 | end 30 | 31 | def classify(text) 32 | cat_weight = Math.log(1.0 / @categories.size) 33 | ratings = {} 34 | 35 | @categories.each do |category| 36 | features_weight = 0.0 37 | tokenize(text) do |word| 38 | features_weight += Math.log get_word_weight(word, category) 39 | end 40 | ratings[category] = cat_weight + features_weight 41 | end 42 | 43 | ratings 44 | end 45 | 46 | def train(category, data) 47 | @categories << category.to_sym 48 | 49 | tokenize(data) do |word| 50 | @model[word][category.to_sym] += 1 51 | end 52 | end 53 | 54 | def get_word_count(word, category=nil) 55 | if category.nil? 56 | @model.fetch(word).reduce(0) do |sum, (_, v)| 57 | sum += v 58 | end 59 | elsif @categories.include? category.to_sym 60 | @model.fetch(word)[category.to_sym] || 0 61 | end 62 | rescue 63 | 0 64 | end 65 | 66 | def get_word_weight(word, category) 67 | total_word_count = get_word_count(word) 68 | total_word_count = 1 if total_word_count == 0 69 | (get_word_count(word, category).to_f + 1) / (total_word_count + @model.keys.size) 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /bayes/stop_words.list: -------------------------------------------------------------------------------- 1 | a 2 | about 3 | above 4 | across 5 | after 6 | afterwards 7 | again 8 | against 9 | all 10 | almost 11 | alone 12 | along 13 | already 14 | also 15 | although 16 | always 17 | am 18 | among 19 | amongst 20 | amoungst 21 | amount 22 | an 23 | and 24 | another 25 | any 26 | anyhow 27 | anyone 28 | anything 29 | anyway 30 | anywhere 31 | are 32 | around 33 | as 34 | at 35 | back 36 | be 37 | became 38 | because 39 | become 40 | becomes 41 | becoming 42 | been 43 | before 44 | beforehand 45 | behind 46 | being 47 | below 48 | beside 49 | besides 50 | between 51 | beyond 52 | bill 53 | both 54 | bottom 55 | but 56 | by 57 | call 58 | can 59 | cannot 60 | cant 61 | co 62 | computer 63 | con 64 | could 65 | couldnt 66 | cry 67 | de 68 | describe 69 | detail 70 | do 71 | done 72 | down 73 | due 74 | during 75 | each 76 | eg 77 | eight 78 | either 79 | eleven 80 | else 81 | elsewhere 82 | empty 83 | enough 84 | etc 85 | even 86 | ever 87 | every 88 | everyone 89 | everything 90 | everywhere 91 | except 92 | few 93 | fifteen 94 | fify 95 | fill 96 | find 97 | fire 98 | first 99 | five 100 | for 101 | former 102 | formerly 103 | forty 104 | found 105 | four 106 | from 107 | front 108 | full 109 | further 110 | get 111 | give 112 | go 113 | had 114 | has 115 | hasnt 116 | have 117 | he 118 | hence 119 | her 120 | here 121 | hereafter 122 | hereby 123 | herein 124 | hereupon 125 | hers 126 | herself 127 | him 128 | himself 129 | his 130 | how 131 | however 132 | hundred 133 | i 134 | ie 135 | if 136 | in 137 | inc 138 | indeed 139 | interest 140 | into 141 | is 142 | it 143 | its 144 | itself 145 | keep 146 | last 147 | latter 148 | latterly 149 | least 150 | less 151 | ltd 152 | made 153 | many 154 | may 155 | me 156 | meanwhile 157 | might 158 | mill 159 | mine 160 | more 161 | moreover 162 | most 163 | mostly 164 | move 165 | much 166 | must 167 | my 168 | myself 169 | name 170 | namely 171 | neither 172 | never 173 | nevertheless 174 | next 175 | nine 176 | no 177 | nobody 178 | none 179 | noone 180 | nor 181 | not 182 | nothing 183 | now 184 | nowhere 185 | of 186 | off 187 | often 188 | on 189 | once 190 | one 191 | only 192 | onto 193 | or 194 | other 195 | others 196 | otherwise 197 | our 198 | ours 199 | ourselves 200 | out 201 | over 202 | own 203 | part 204 | per 205 | perhaps 206 | please 207 | put 208 | rather 209 | re 210 | same 211 | see 212 | seem 213 | seemed 214 | seeming 215 | seems 216 | serious 217 | several 218 | she 219 | should 220 | show 221 | side 222 | since 223 | sincere 224 | six 225 | sixty 226 | so 227 | some 228 | somehow 229 | someone 230 | something 231 | sometime 232 | sometimes 233 | somewhere 234 | still 235 | such 236 | system 237 | take 238 | ten 239 | than 240 | that 241 | the 242 | their 243 | them 244 | themselves 245 | then 246 | thence 247 | there 248 | thereafter 249 | thereby 250 | therefore 251 | therein 252 | thereupon 253 | these 254 | they 255 | thick 256 | thin 257 | third 258 | this 259 | those 260 | though 261 | three 262 | through 263 | throughout 264 | thru 265 | thus 266 | to 267 | together 268 | too 269 | top 270 | toward 271 | towards 272 | twelve 273 | twenty 274 | two 275 | un 276 | under 277 | until 278 | up 279 | upon 280 | us 281 | very 282 | via 283 | was 284 | we 285 | well 286 | were 287 | what 288 | whatever 289 | when 290 | whence 291 | whenever 292 | where 293 | whereafter 294 | whereas 295 | whereby 296 | wherein 297 | whereupon 298 | wherever 299 | whether 300 | which 301 | while 302 | whither 303 | who 304 | whoever 305 | whole 306 | whom 307 | whose 308 | why 309 | will 310 | with 311 | within 312 | without 313 | would 314 | yet 315 | you 316 | your 317 | yours 318 | yourself 319 | yourselves 320 | -------------------------------------------------------------------------------- /dungeon/dungeon.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pawandubey/ruby_experiments/22caebed09a4c981860826a4edfa395b2bfc9f2c/dungeon/dungeon.rb -------------------------------------------------------------------------------- /markov/markov.rb: -------------------------------------------------------------------------------- 1 | # make a random walker using markov chain 2 | # take a string as input and then walk it randomly 3 | # 1. Take string as input and then find successors for each word -> store in a hash 4 | # 2. Generate a random value and and seed word, then pick a random successor from its list of successors and keep doing it till N words are filled. 5 | 6 | class Markov 7 | 8 | def initialize 9 | @successors = Hash.new { |h,k| h[k] = [] } 10 | end 11 | 12 | def read_file(file) 13 | # File.foreach(file) do |line| 14 | # find_successors(line) 15 | #end 16 | contents = File.read(file) 17 | find_successors(contents) 18 | end 19 | 20 | def read_string(string) 21 | find_successors(string) 22 | end 23 | 24 | def generate(start, limit) 25 | #puts @successors 26 | random_walk(start, limit) 27 | end 28 | 29 | private 30 | 31 | def find_successors(string) 32 | words = string.split() 33 | 34 | words[0..-2].each.with_index do |word, i| 35 | @successors[word.to_sym] << words[i+1] 36 | end 37 | end 38 | 39 | def random_walk(start, limit) 40 | output = [] 41 | next_w = start 42 | while (limit-=1) > 0 43 | ind = (Random.rand * @successors[next_w.to_sym].size).to_i 44 | #puts @successors[next_w].size, limit 45 | next_w = @successors[next_w.to_sym][ind] 46 | output << next_w 47 | #puts next_w, ind 48 | end 49 | @outstring = output.join(" ") 50 | end 51 | 52 | end 53 | 54 | if __FILE__ == $0 55 | mk = Markov.new 56 | # mk.read_string("hello world baby girl how are you I am fine but not so much look at the stars look how they shine for you") 57 | mk.read_file("styles.txt") 58 | puts mk.generate("for", 50) 59 | end 60 | -------------------------------------------------------------------------------- /oslab/README.md: -------------------------------------------------------------------------------- 1 | #Operating System Algorithms 2 | 3 | These are some of the Algorithms discussed in the Operating System course at MIT, Manipal. Implementing because I have my lab exams tomorrow. :P 4 | -------------------------------------------------------------------------------- /oslab/disk_scheduling/cscan.rb: -------------------------------------------------------------------------------- 1 | #block access order 2 | blocks = [10, 19, 3, 14, 12, 9] 3 | start = 10 4 | time = 0 5 | blocks.sort!.rotate!(blocks.index(10)) 6 | #blocks.delete! 10 7 | for i in (1..blocks.size-1) 8 | if (blocks[i]) 9 | time += (blocks[i] - blocks[i-1]).abs 10 | puts (blocks[i] - blocks[i-1]).abs 11 | end 12 | end 13 | puts time.to_f/5 14 | -------------------------------------------------------------------------------- /oslab/disk_scheduling/fcfs.rb: -------------------------------------------------------------------------------- 1 | #block access order 2 | blocks = [10, 19, 3, 14, 12, 9] 3 | 4 | time = 0 5 | 6 | for i in (1..blocks.size-1) do 7 | val = (blocks[i] - blocks[i-1]).abs 8 | puts val 9 | time += val 10 | end 11 | 12 | puts time.to_f/5 13 | -------------------------------------------------------------------------------- /oslab/disk_scheduling/look.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pawandubey/ruby_experiments/22caebed09a4c981860826a4edfa395b2bfc9f2c/oslab/disk_scheduling/look.rb -------------------------------------------------------------------------------- /oslab/disk_scheduling/scan.rb: -------------------------------------------------------------------------------- 1 | #block access order 2 | blocks = [10, 19, 3, 14, 12, 9] 3 | 4 | mark = [] 5 | 6 | time = 0 7 | 8 | blocks.sort! 9 | start = blocks.index(10) 10 | blocks.rotate!(start) 11 | blocks[blocks.size-start..-1] = blocks[blocks.size-start..-1].reverse 12 | puts blocks 13 | blocks[1..-1].each.with_index do |b,i| 14 | time += (blocks[i]-blocks[i-1]).abs 15 | end 16 | 17 | puts time.to_f./5 18 | -------------------------------------------------------------------------------- /oslab/disk_scheduling/sstf.rb: -------------------------------------------------------------------------------- 1 | #block access order 2 | blocks = [19, 3, 14, 12, 9] 3 | #done = [10] 4 | 5 | time = 0 6 | start = 10 7 | # blocks.sort! { | a, b | (10 - a).abs <=> (10 - b).abs } 8 | # puts blocks 9 | while blocks.size > 0 10 | nextPos = 0 11 | for j in (0...blocks.size-1) 12 | if (start - blocks[j]).abs < (start - blocks[nextPos]).abs 13 | nextPos = j 14 | end 15 | end 16 | puts blocks[nextPos] 17 | time += (start - blocks[nextPos]).abs 18 | start = blocks[nextPos] 19 | blocks.delete blocks[nextPos] 20 | end 21 | 22 | puts time.to_f/5 23 | -------------------------------------------------------------------------------- /oslab/memory_management/best_fit.rb: -------------------------------------------------------------------------------- 1 | #size of processes in order 2 | processes = [212, 417, 112, 426] 3 | 4 | #size of memory segments in order 5 | memory = [100, 500, 200, 300, 600] 6 | 7 | #result array storing the final segment of process i 8 | result = [] 9 | 10 | memory.sort! 11 | puts memory 12 | processes.each do |proc| 13 | memory.each_with_index do |mem, index| 14 | if mem >= proc 15 | memory[index] -= proc 16 | result << index 17 | break 18 | end 19 | end 20 | end 21 | 22 | puts result 23 | -------------------------------------------------------------------------------- /oslab/memory_management/first_fit.rb: -------------------------------------------------------------------------------- 1 | #size of processes in order 2 | processes = [212, 417, 112, 426] 3 | 4 | #size of memory segments in order 5 | memory = [100, 500, 200, 300, 600] 6 | 7 | #result array storing the final segment of process i 8 | result = [] 9 | #place according to first fitting segment 10 | processes.each_with_index do |proc| 11 | memory.each_with_index do |mem, index| 12 | if mem >= proc 13 | memory[index] -= proc 14 | result << index 15 | break 16 | end 17 | end 18 | end 19 | 20 | puts result 21 | -------------------------------------------------------------------------------- /oslab/memory_management/worst_fit.rb: -------------------------------------------------------------------------------- 1 | #size of processes in order 2 | processes = [212, 417, 112, 426] 3 | 4 | #size of memory segments in order 5 | memory = [100, 500, 200, 300, 600] 6 | 7 | #result array storing the final segment of process i 8 | result = [] 9 | memory.sort!.reverse! 10 | 11 | puts memory 12 | processes.each do |proc| 13 | memory.each_with_index do |mem, index| 14 | if mem >= proc 15 | memory[index] -= proc 16 | result << index 17 | break 18 | end 19 | end 20 | end 21 | 22 | puts result 23 | -------------------------------------------------------------------------------- /oslab/page_replacement/cache.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pawandubey/ruby_experiments/22caebed09a4c981860826a4edfa395b2bfc9f2c/oslab/page_replacement/cache.db -------------------------------------------------------------------------------- /oslab/page_replacement/cache.db.p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pawandubey/ruby_experiments/22caebed09a4c981860826a4edfa395b2bfc9f2c/oslab/page_replacement/cache.db.p -------------------------------------------------------------------------------- /oslab/page_replacement/fcfs.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pawandubey/ruby_experiments/22caebed09a4c981860826a4edfa395b2bfc9f2c/oslab/page_replacement/fcfs.rb -------------------------------------------------------------------------------- /oslab/page_replacement/lru.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pawandubey/ruby_experiments/22caebed09a4c981860826a4edfa395b2bfc9f2c/oslab/page_replacement/lru.rb -------------------------------------------------------------------------------- /oslab/page_replacement/optimal.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pawandubey/ruby_experiments/22caebed09a4c981860826a4edfa395b2bfc9f2c/oslab/page_replacement/optimal.rb -------------------------------------------------------------------------------- /practice/deficient.rb: -------------------------------------------------------------------------------- 1 | input = [111, 112, 220, 69, 134, 85] 2 | result = [] 3 | 4 | input.each do |n| 5 | sum = (1..n).to_a.select { |i| n % i == 0 }.reduce(:+) 6 | 7 | result << if sum > (2 * n) 8 | "abundant by #{sum - (2 * n)}" 9 | elsif sum < (2 * n) 10 | "deficient" 11 | else 12 | "neither" 13 | end 14 | end 15 | 16 | puts result 17 | -------------------------------------------------------------------------------- /rsa/README.md: -------------------------------------------------------------------------------- 1 | #Toy RSA implementation and client 2 | 3 | This isn't meant for any serious encryption, and should be used for educational purposes ONLY. 4 | 5 | ##Usage 6 | (prefix everything with 'ruby', of course) 7 | 8 | `rsa_client gen` : generate public and private keys as a hash 9 | 10 | `rsa_client enc -n -e ` : encrypt given message with the public key parts 11 | 12 | `rsa_client dec -n -d ` : decrypt given secret with the private key parts 13 | 14 | ##License 15 | MIT License 16 | -------------------------------------------------------------------------------- /rsa/rsa.rb: -------------------------------------------------------------------------------- 1 | # The keys for the RSA algorithm are generated the following way: 2 | # 3 | # Choose two distinct prime numbers p and q. 4 | # For security purposes, the integers p and q should be chosen at random, 5 | # and should be similar in magnitude but 'differ in length by a 6 | # few digits'[2] to make factoring harder. Prime integers can be efficiently found using a primality test. 7 | # Compute n = pq. 8 | # n is used as the modulus for both the public and private keys. Its length, usually expressed in bits, is the key length. 9 | # Compute φ(n) = φ(p)φ(q) = (p − 1)(q − 1) = n - (p + q -1), where φ is Euler's totient function. This value is kept private. 10 | # Choose an integer e such that 1 < e < φ(n) and gcd(e, φ(n)) = 1; i.e., e and φ(n) are coprime. 11 | # e is released as the public key exponent. 12 | # e having a short bit-length and small Hamming weight results in more 13 | # efficient encryption – most commonly 216 + 1 = 65,537. However, much 14 | # smaller values of e (such as 3) have been shown to be less secure in some settings.[8] 15 | # Determine d as d ≡ e−1 (mod φ(n)); i.e., d is the modular multiplicative inverse of e (modulo φ(n)). 16 | # 17 | # This is more clearly stated as: solve for d given d⋅e ≡ 1 (mod φ(n)) 18 | # d is kept as the private key exponent. 19 | # 20 | # The public key consists of the modulus n and the public (or encryption) exponent e. The private key 21 | # consists of the modulus n and the private (or decryption) exponent d, which must be kept secret. p, q, 22 | # and φ(n) must also be kept secret because they can be used to calculate d. 23 | require 'prime' 24 | #require 'random' 25 | 26 | class RSA 27 | 28 | def encrypt(message, public_key) 29 | modular_power(message, public_key[:e], public_key[:n]) 30 | end 31 | 32 | def decrypt(secret, private_key) 33 | modular_power(secret, private_key[:d], private_key[:n]) 34 | end 35 | 36 | def generate_keys 37 | p = generate_random_prime 38 | q = generate_random_prime 39 | 40 | n = p * q 41 | totient = (p - 1) * (q - 1) 42 | 43 | e = find_coprime(totient) 44 | d = mod_inverse(e, totient) 45 | 46 | @public_key = { :n => n, :e => e } 47 | @private_key = { :n => n, :d => d } 48 | 49 | keys = { :public_key => @public_key, :private_key => @private_key } 50 | end 51 | 52 | private 53 | #modular exponentiation 54 | def modular_power(base, exponent, modulus) 55 | result = 1 56 | while exponent > 0 57 | if exponent % 2 == 1 58 | result = (result * base) % modulus 59 | end 60 | exponent >>= 1 61 | base = (base ** 2) % modulus 62 | end 63 | result 64 | end 65 | 66 | #Keeping the prime numbers in sane limits, else the CPU will blow off 67 | def generate_random_prime 68 | prng = Random.new 69 | p = prng.rand((1000000)..(100000000)) 70 | while(!Prime.prime?(p)) 71 | p = prng.rand((1000000)..(100000000)) 72 | end 73 | p 74 | end 75 | 76 | #Find a co-prime in the range 1...number-1 non-deterministically 77 | def find_coprime(number) 78 | prng = Random.new 79 | coprime = prng.rand(1..number-1) 80 | while number.gcd(coprime) != 1 81 | coprime = prng.rand(1..number-1) 82 | end 83 | coprime 84 | end 85 | 86 | # Modular inverse with Extended Euclidean Method, taken from Wikipedia 87 | def mod_inverse(a,m) 88 | t, newt, r, newr = [0, 1, m, a] 89 | 90 | while newr != 0 91 | quotient = r / newr 92 | t, newt = [newt, t - quotient * newt] 93 | r, newr = [newr, r - quotient * newr] 94 | end 95 | 96 | t < 0 ? t + m : t 97 | end 98 | 99 | end 100 | -------------------------------------------------------------------------------- /rsa/rsa_client.rb: -------------------------------------------------------------------------------- 1 | #Client script for the accompanying RSA implementation 2 | require_relative 'rsa' 3 | 4 | usage = <<-END 5 | RSA implementation client by Pawan Dubey, 2016 6 | Meant for educational purposes only. Should NEVER be used for any serious encryption. The creator bears no liability for any consequences of running this program. 7 | 8 | Usage: (prefix everything with 'ruby', of course) 9 | rsa_client gen : generate public and private keys as a hash 10 | rsa_client enc -n -e : encrypt given message with the public key parts 11 | rsa_client dec -n -d : decrypt given secret with the private key parts 12 | END 13 | 14 | exit if ARGV.size == 0 15 | 16 | cipher = RSA.new 17 | 18 | if ARGV.size == 1 && ARGV[0] == 'gen' 19 | keys = cipher.generate_keys 20 | puts "Keys: #{keys}" 21 | 22 | elsif ARGV.size == 1 && ARGV[0] != 'gen' 23 | puts usage 24 | 25 | elsif ARGV.size > 2 26 | if ARGV.include?('enc') 27 | message = ARGV[1].to_i 28 | n = ARGV [ARGV.index('-n') + 1].to_i 29 | e = ARGV[ARGV.index('-e') + 1].to_i 30 | 31 | puts cipher.encrypt(message,{ :n => n, :e => e }) 32 | 33 | elsif ARGV.include?('dec') 34 | secret = ARGV[1].to_i 35 | n = ARGV[ARGV.index('-n') + 1].to_i 36 | d = ARGV[ARGV.index('-d') + 1].to_i 37 | 38 | puts cipher.decrypt(secret, { :n => n, :d => d}) 39 | else 40 | puts "Invalid options. Try again." 41 | puts usage 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /spellcheck/sanitize.rb: -------------------------------------------------------------------------------- 1 | require 'find' 2 | 3 | path_to_corpus = '/home/pawandubey/repository/ruby_experiments/spellcheck/corpus' 4 | path_to_sanitized_file = 'sanitized.txt' 5 | 6 | File.delete(path_to_sanitized_file) if File.exist?(path_to_sanitized_file) 7 | 8 | File.open(path_to_sanitized_file, 'w') do |file| 9 | files_read = 0 10 | Find.find(path_to_corpus) do |path| 11 | begin 12 | next if FileTest.directory?(path) || File.basename(path)[0] == '.' 13 | text = File.read(path) 14 | file.puts text.gsub(/[^A-Za-z0-9\n ]/, '').gsub(/[\s|\n]+/, ' ').downcase 15 | files_read += 1 16 | rescue => e 17 | puts "Total #{files_read} files read. Error reading #{path}" 18 | puts e.message 19 | end 20 | end 21 | puts "Total #{files_read} files read." 22 | end 23 | -------------------------------------------------------------------------------- /spellcheck/spellcheck.rb: -------------------------------------------------------------------------------- 1 | class SpellCheck 2 | def correct(word) 3 | train_model 4 | 5 | prospects = find_similar(word) 6 | prospects 7 | corrections = find_closest(word, prospects) 8 | 9 | puts "possible corrections : #{corrections.join(' | ')}" 10 | end 11 | 12 | private 13 | def find_closest(word, prospects) 14 | samp_size = sample_size 15 | correction = prospects.min_by(5) {|v| edit_distance(word, v.to_s)} 16 | end 17 | 18 | def cond_probablity(key, word) 19 | 20 | end 21 | 22 | def sample_size 23 | @model.values.each.reduce(0) {|v,a| a+=v} 24 | end 25 | 26 | def find_similar(word) 27 | similars = @model.keys.find_all {|w| edit_distance(word, w.to_s) <= 2} 28 | end 29 | 30 | def train_model 31 | @path_to_sanitized_file = 'sanitized.txt' 32 | @model = Hash.new(1) 33 | 34 | File.foreach(@path_to_sanitized_file) do |line| 35 | line.split.each do |word| 36 | @model[word.to_sym] += 1 37 | end 38 | end 39 | end 40 | 41 | #Levenshtein Distance, taken from https://github.com/rubygems/rubygems/blob/master/lib/rubygems/text.rb 42 | def edit_distance(str1, str2) 43 | n = str1.length 44 | m = str2.length 45 | max = n/2 46 | 47 | return m if 0 == n 48 | return n if 0 == m || (n - m).abs > max 49 | 50 | d = (0..m).to_a 51 | x = nil 52 | str1.each_char.with_index do |char1,i| 53 | e = i+1 54 | 55 | str2.each_char.with_index do |char2,j| 56 | cost = (char1 == char2) ? 0 : 1 57 | x = [ d[j+1] + 1, # insertion 58 | e + 1, # deletion 59 | d[j] + cost # substitution 60 | ].min 61 | d[j] = e 62 | e = x 63 | end 64 | d[m] = x 65 | end 66 | x 67 | end 68 | end 69 | 70 | word = ARGV[0].to_s 71 | check = SpellCheck.new 72 | 73 | check.correct(word) 74 | --------------------------------------------------------------------------------