├── .rvmrc ├── .gitignore ├── lib ├── domo │ └── version.rb └── domo.rb ├── Gemfile ├── spec ├── spec_helper.rb └── domo_spec.rb ├── Rakefile ├── README.md └── domo.gemspec /.rvmrc: -------------------------------------------------------------------------------- 1 | rvm use 1.9.2@domo --create 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .bundle 3 | Gemfile.lock 4 | pkg/* 5 | -------------------------------------------------------------------------------- /lib/domo/version.rb: -------------------------------------------------------------------------------- 1 | module Domo 2 | VERSION = "0.0.3" 3 | end 4 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | # Specify your gem's dependencies in domo.rb.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require File.join(File.dirname(__FILE__), "..", "lib", "domo") 2 | 3 | RSpec.configure do |config| 4 | config.mock_with :rspec 5 | end 6 | 7 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/gem_tasks' 2 | require 'rspec/core/rake_task' 3 | RSpec::Core::RakeTask.new(:spec) do |spec| 4 | spec.rspec_opts = ['-fn --color'] 5 | end 6 | 7 | #task :default => :spec 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Domo.rb 2 | Utility functions for domain strings 3 | 4 | ### Installation 5 | gem install domo-rb 6 | 7 | 8 | ### Usage 9 | >> require 'domo' 10 | => 'true' 11 | 12 | >> Domo.canonicalize "www.ebay.com" 13 | => "ebay.com" 14 | 15 | >> Domo.canonicalize "boats.ebay.co.uk" 16 | => "ebay.co.uk" 17 | 18 | >> Domo.canonicalize "edition.cnn.com" 19 | => "cnn.com" 20 | 21 | >> Domo.strip_url "http://www.cnn.com/news/last-week/main.html" 22 | => "www.cnn.com" 23 | 24 | 25 | ### Testing Notes 26 | Install development dependencies with 27 | 28 | bundle install 29 | And run the tests with: 30 | 31 | rake spec 32 | 33 | 34 | Copyright (c) 2011 Gur Dotan, released under the MIT license. 35 | -------------------------------------------------------------------------------- /spec/domo_spec.rb: -------------------------------------------------------------------------------- 1 | require File.join(File.dirname(__FILE__),"spec_helper") 2 | 3 | describe "Domo" do 4 | 5 | it "should canonicalize 'www.ebay.com'" do 6 | Domo.canonicalize("www.ebay.com").should == "ebay.com" 7 | end 8 | 9 | it "should canonicalize 'motors.ebay.com'" do 10 | Domo.canonicalize("motors.ebay.com").should == "ebay.com" 11 | end 12 | 13 | it "should canonicalize 'www.ebay.co.uk'" do 14 | Domo.canonicalize("www.ebay.co.uk").should == "ebay.co.uk" 15 | end 16 | 17 | it "should canonicalize 'ebay.com'" do 18 | Domo.canonicalize("ebay.com").should == "ebay.com" 19 | end 20 | 21 | it "should strip 'http://www.ebay.com/shopping/1.html'" do 22 | Domo.strip_url("http://www.ebay.com/shopping/1.html").should == "www.ebay.com" 23 | end 24 | 25 | end 26 | -------------------------------------------------------------------------------- /domo.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | lib = File.expand_path('../lib/', __FILE__) 3 | $:.push(lib) unless $:.include?(lib) 4 | require 'rake/gempackagetask' 5 | require './lib/domo/version' 6 | 7 | 8 | Gem::Specification.new do |s| 9 | s.name = "domo-rb" 10 | s.version = Domo::VERSION 11 | s.platform = Gem::Platform::RUBY 12 | s.date = Time.now.utc.strftime("%Y-%m-%d") 13 | s.authors = ["Gur Dotan"] 14 | s.email = "gurdotan@gmail.com" 15 | s.homepage = "http://github.com/gurdotan/domo.rb" 16 | s.summary = "Utility functions for domain strings" 17 | s.require_path = ["lib"] 18 | s.has_rdoc = false 19 | s.extra_rdoc_files = ["README.md"] 20 | s.files = `git ls-files`.split("\n") 21 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 22 | s.required_rubygems_version = ">= 1.3.6" 23 | s.add_development_dependency "rspec" 24 | end 25 | 26 | -------------------------------------------------------------------------------- /lib/domo.rb: -------------------------------------------------------------------------------- 1 | require "domo/version" 2 | 3 | module Domo 4 | 5 | GENERIC_TOP_LEVEL_DOMAINS = %W{ aero asia biz com coop edu gov info int jobs mil mobi museum name net org pro tel travel xxx } 6 | GEO_TOP_LEVEL_DOMAINS = %W{ ac ad ae af ag ai al am an ao aq ar as at au aw ax az ba bb bd be bf bg bh bi bj bm bn bo br bs bt bv bw by bz ca cc cd cf cg ch ci ck cl cm cn co cr cs cu cv cx cy cz de dj dk dm do dz ec ee eg er es et eu fi fj fk fm fo fr ga gb gd ge gf gg gh gi gl gm gn gp gq gr gs gt gu gw gy hk hm hn hr ht hu id ie il im in io iq ir is it je jm jo jp ke kg kh ki km kn kp kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mk ml mm mn mo mp mq mr ms mt mu mv mw mx my mz na nc ne nf ng ni nl no np nr nu nz om pa pe pf pg ph pk pl pm pn pr ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl sm sn so sr st su sv sy sz tc td tf tg th tj tk tl tm tn to tp tr tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws ye yt za zm zw } 7 | GEO_SPECIFIC_SECOND_LEVEL = %W{ co ac } # Not used in REGEX since it's included in the geo (co = Colombia, ac = Ascension Island) 8 | 9 | REGEX = /^(#{GENERIC_TOP_LEVEL_DOMAINS.join("|")}|#{GEO_TOP_LEVEL_DOMAINS.join("|")})$/ 10 | 11 | # Returns a canonicalized domain for a given domain string 12 | # Examples: 13 | # "www.cnn.com" => "cnn.com" 14 | # "books.ebay.co.uk" => "ebay.co.uk" 15 | # "news.nytimes.com:3000" => "nytimes.com" 16 | def self.canonicalize(domain) 17 | parts = domain.split(":")[0].split(".").reverse 18 | 19 | check_further = true 20 | i = 0 21 | while check_further 22 | if parts[i] =~ REGEX 23 | i += 1 24 | else 25 | check_further = false 26 | end 27 | end 28 | 29 | return parts[0..i].reverse.join(".") 30 | end 31 | 32 | def self.canonize(domain) 33 | $stdout.puts "Domo#canonize deprecated. Please use Domo#canonicalize instead." 34 | canonicalize(domain) 35 | end 36 | 37 | def strip_url(url) 38 | url[/:\/\/(.[^\/]+)/, 1] 39 | end 40 | 41 | 42 | end 43 | 44 | include Domo 45 | --------------------------------------------------------------------------------