├── .gitignore ├── LICENSE ├── README.md ├── Rakefile ├── bin └── gem-codesearch-setup ├── gem-codesearch.gemspec └── test ├── test_all.rb └── test_bin.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | /.config 4 | /coverage/ 5 | /InstalledFiles 6 | /pkg/ 7 | /spec/reports/ 8 | /test/tmp/ 9 | /test/version_tmp/ 10 | /tmp/ 11 | /latest-gem 12 | /mirror 13 | /log 14 | 15 | ## Specific to RubyMotion: 16 | .dat* 17 | .repl_history 18 | build/ 19 | 20 | ## Documentation cache and generated files: 21 | /.yardoc/ 22 | /_yardoc/ 23 | /doc/ 24 | /rdoc/ 25 | 26 | ## Environment normalisation: 27 | /.bundle/ 28 | /lib/bundler/man/ 29 | 30 | # for a library or gem, you might want to ignore these files since the code is 31 | # intended to run in multiple environments; otherwise, check them in: 32 | # Gemfile.lock 33 | # .ruby-version 34 | # .ruby-gemset 35 | 36 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 37 | .rvmrc 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Tanaka Akira 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gem-codesearch 2 | 3 | gem-codesearch sets up a full text code search engine on mirror of rubygems. 4 | It use rubygems-mirror to mirror rubygems and codesearch for search engine. 5 | 6 | 400GB of free disk space is required to store the rubygems mirror, 7 | unpacked gems and codesearch index at 2016-01. 8 | It will be larger in future. 9 | 10 | ## Usage 11 | 12 | This creates "latest-gem" package in zoekt index. 13 | 14 | # Install zoekt from GitHub repository: https://github.com/google/zoekt 15 | % gem install gem-codesearch 16 | % mkdir $HOME/gem-codesearch # Make a some directory 17 | % cd $HOME/gem-codesearch 18 | % gem-codesearch-setup all >& setup.log # It may take several days or more 19 | 20 | If "gem-codesearch-setup all" fails due to network or server errors, 21 | try again to continue. 22 | 23 | After the index is created, enjoy code search. 24 | 25 | % zoekt -index_dir $HOME/gem-codesearch/zoekt-index sort_by 26 | 27 | You can search without `-index_dir` option after creating the symlink 28 | named `.zoekt` to HOME directory 29 | 30 | ## Usage without install 31 | 32 | % sudo aptitude install codesearch # https://github.com/google/codesearch 33 | % gem install rubygems-mirror 34 | % cd $HOME 35 | % git clone https://github.com/akr/gem-codesearch.git 36 | % cd gem-codesearch 37 | % rake all >& setup.log # It may take several days or more 38 | 39 | ## Use milkode instead of zoekt 40 | 41 | % gem install milkode 42 | % milk init --default # If you use milkode first time 43 | % rake mirror unpack index_milkode >& setup.log 44 | 45 | ## Use codesearch instead of zoekt 46 | 47 | $ rake mirror unpack index_csearch 48 | 49 | This creates a set of indexes for csearch in gem-codesearch/csearchindexes. 50 | Use a wrapper shell script to call csearch: 51 | 52 | ``` 53 | #!/bin/sh 54 | 55 | export CSEARCHINDEX 56 | for CSEARCHINDEX in /path/to/gem-codesearch/csearchindexes/?; do csearch "$@"; done 57 | ``` 58 | 59 | ## Links 60 | 61 | - https://github.com/akr/gem-codesearch 62 | - https://rubygems.org/gems/gem-codesearch 63 | 64 | ## Author 65 | 66 | Tanaka Akira 67 | akr@fsij.org 68 | 69 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rbconfig' 2 | require 'fileutils' 3 | require 'find' 4 | require 'pp' 5 | 6 | task :default => :help 7 | 8 | task :help do 9 | puts <<'End' 10 | Usage: 11 | rake help 12 | rake all # mirror, unpack, index 13 | rake mirror 14 | rake unpack 15 | rake index # same as index_zoekt 16 | rake index_zoekt 17 | rake index_codesearch # same as index_zoekt (for compatibility) 18 | rake index_milkode 19 | rake index_csearch 20 | End 21 | end 22 | 23 | task :all => [:mirror, :unpack, :index] 24 | 25 | BASE_DIR = ENV['GEM_CODESEARCH_DIR'] || Dir.pwd 26 | MIRROR_URL = ENV['GEM_CODESEARCH_URL'] || 'http://rubygems.org/' 27 | 28 | MIRROR_DIR = "#{BASE_DIR}/mirror" 29 | LATEST_DIR = ENV['GEM_CODESEARCH_GEM_DIR'] || "#{BASE_DIR}/latest-gem" 30 | LOG_DIR = "#{BASE_DIR}/log" 31 | 32 | GEM_COMMAND = "#{RbConfig::CONFIG["bindir"]}/gem" 33 | MILK_COMMAND = "#{RbConfig::CONFIG["bindir"]}/milk" 34 | 35 | file "#{BASE_DIR}/.gem/.mirrorrc" do |t| 36 | FileUtils.mkpath File.dirname(t.name) 37 | File.write(t.name, <<"End") 38 | --- 39 | - from: #{MIRROR_URL} 40 | to: #{MIRROR_DIR} 41 | delete: true 42 | End 43 | end 44 | 45 | task :mirror => "#{BASE_DIR}/.gem/.mirrorrc" do 46 | FileUtils.mkpath MIRROR_DIR 47 | # HOME is set because gem mirror reads $HOME/.gem/.mirrorrc. 48 | env = {"HOME"=>BASE_DIR} 49 | sh env, GEM_COMMAND, "mirror", "--verbose" 50 | end 51 | 52 | task :unpack do 53 | FileUtils.mkpath LOG_DIR 54 | 55 | available_gems = {} 56 | Dir.foreach("#{MIRROR_DIR}/gems") {|filename| 57 | next if /\.gem\z/ !~ filename 58 | available_gems[filename] = true 59 | } 60 | FileUtils.mkpath LATEST_DIR 61 | all_specs = File.open("#{MIRROR_DIR}/specs.4.8") {|f| Marshal.load(f) } 62 | all_specs = all_specs.reject {|name,version,platform| 63 | /\A\./ =~ name || 64 | /\A[0-9a-zA-Z._-]+\z/ !~ name || 65 | /\A[0-9a-zA-Z._-]+\z/ !~ version.to_s || 66 | platform != 'ruby' || 67 | !available_gems["#{name}-#{version}.gem"] 68 | } 69 | 70 | #all_specs = all_specs.reject {|name,version,platform| /\Afoo-/ !~ name } 71 | 72 | latest_vnames = [] 73 | h = all_specs.group_by {|name,version| name } 74 | h.each {|name, list| 75 | list = list.sort_by {|name,version| version } 76 | vnames = list.map {|name,version| "#{name}-#{version}" } 77 | latest_vnames << vnames.pop 78 | } 79 | 80 | already_unpacked = Dir.entries(LATEST_DIR) 81 | already_unpacked = already_unpacked - %w[. ..] 82 | (already_unpacked - latest_vnames).each {|vname| 83 | puts "remove: #{vname}" 84 | FileUtils.rmtree("#{LATEST_DIR}/#{vname}") 85 | } 86 | 87 | File.open("#{LOG_DIR}/unpack.log.#{Time.now.strftime '%Y%m%dT%H%M%S%z'}", "a") {|log| 88 | (latest_vnames - already_unpacked).each {|vname| 89 | puts "unpack: #{vname}" 90 | system GEM_COMMAND, 'unpack', "#{MIRROR_DIR}/gems/#{vname}.gem", :chdir => LATEST_DIR, :out => log 91 | if !$?.success? 92 | puts "failed to unpack #{vname}" 93 | end 94 | fix_permission("#{LATEST_DIR}/#{vname}") 95 | clean_files("#{LATEST_DIR}/#{vname}") 96 | } 97 | } 98 | 99 | end 100 | 101 | task :index => :index_zoekt 102 | task :index_codesearch => :index_zoekt # for compatibility 103 | 104 | INDEX_COMMAND = 'zoekt-index' 105 | task :index_zoekt do 106 | FileUtils.rm_rf("zoekt-index") 107 | sh INDEX_COMMAND, "-index", "zoekt-index", LATEST_DIR 108 | end 109 | 110 | CSEARCH_COMMAND = 'cindex' 111 | task :index_csearch do 112 | dir = File.join(BASE_DIR, "csearchindexes") 113 | FileUtils.mkpath dir 114 | [nil, *?a..?z].each do |prefix| 115 | env = { "CSEARCHINDEX" => File.join(dir, prefix || '_') } 116 | paths = Dir.glob(File.join(LATEST_DIR, ((prefix || '[ -_]') + "*"))).sort 117 | sh env, CSEARCH_COMMAND, "-reset", *paths 118 | end 119 | end 120 | 121 | task :index_milkode do 122 | # Assume default database for milkode is already created. 123 | # If not, do it as follows: 124 | # milk init --default 125 | milkode_package_list = IO.popen([MILK_COMMAND, 'list']) {|f| f.read } 126 | package_name = File.basename(LATEST_DIR) 127 | if /^#{Regexp.escape package_name}$/ !~ milkode_package_list 128 | sh MILK_COMMAND, 'add', '--verbose', LATEST_DIR 129 | else 130 | sh MILK_COMMAND, 'update', '--verbose', package_name 131 | end 132 | end 133 | 134 | def fix_permission(dir) 135 | return unless File.exist? dir 136 | Find.find(dir) {|fn| 137 | st = File.lstat(fn) 138 | if st.file? 139 | if !st.readable? 140 | File.chmod(0644, fn) 141 | end 142 | elsif st.directory? 143 | if !st.readable? || !st.executable? 144 | File.chmod(0755, fn) 145 | end 146 | end 147 | } 148 | end 149 | 150 | def clean_files(dir) 151 | return unless File.exist? dir 152 | Find.find(dir) {|fn| 153 | st = File.lstat(fn) 154 | if st.file? 155 | if fn.end_with?('.ri') 156 | File.unlink fn 157 | end 158 | end 159 | } 160 | end 161 | -------------------------------------------------------------------------------- /bin/gem-codesearch-setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'rbconfig' 4 | 5 | RAKE_COMMAND = "#{RbConfig::CONFIG["bindir"]}/rake" 6 | topdir = File.dirname(File.dirname(__FILE__)) 7 | RAKEFILE = topdir + "/Rakefile" 8 | 9 | system RAKE_COMMAND, '-f', RAKEFILE, *ARGV 10 | -------------------------------------------------------------------------------- /gem-codesearch.gemspec: -------------------------------------------------------------------------------- 1 | Gem::Specification.new do |s| 2 | s.name = 'gem-codesearch' 3 | s.version = '0.3' 4 | s.date = '2016-07-29' 5 | s.author = 'Tanaka Akira' 6 | s.email = 'akr@fsij.org' 7 | s.required_ruby_version = '>= 2.2.0' 8 | s.add_runtime_dependency 'rubygems-mirror', '~> 1.0', '>= 1.0.1' 9 | s.add_runtime_dependency 'rake', '~> 10.4', '>= 10.4.2' 10 | s.add_development_dependency 'builder', '~> 3.2', '>= 3.2.2' 11 | s.add_development_dependency 'test-unit', '~> 3.0', '>= 3.0.8' 12 | s.executables << 'gem-codesearch-setup' 13 | s.files = %w[ 14 | .gitignore 15 | LICENSE 16 | README.md 17 | Rakefile 18 | bin/gem-codesearch-setup 19 | gem-codesearch.gemspec 20 | ] 21 | s.test_files = %w[ 22 | test/test_all.rb 23 | test/test_bin.rb 24 | ] 25 | s.homepage = 'https://github.com/akr/gem-codesearch' 26 | s.license = 'MIT' 27 | s.summary = 'Set up a full text code search engine for rubygems mirror' 28 | s.description = <<'End' 29 | gem-codesearch sets up a full text code search engine for rubygems mirror. 30 | It use rubygems-mirror to mirror rubygems and 31 | "Code Search" (https://github.com/google/codesearch) for search engine. 32 | End 33 | end 34 | -------------------------------------------------------------------------------- /test/test_all.rb: -------------------------------------------------------------------------------- 1 | require 'test/unit' 2 | require 'tmpdir' 3 | require 'fileutils' 4 | require 'rbconfig' 5 | require 'socket' 6 | require 'pty' 7 | require 'io/console' 8 | 9 | GEM_COMMAND = "#{RbConfig::CONFIG["bindir"]}/gem" 10 | MILK_COMMAND = "#{RbConfig::CONFIG["bindir"]}/milk" 11 | RAKE_COMMAND = "#{RbConfig::CONFIG["bindir"]}/rake" 12 | 13 | topdir = File.dirname(File.dirname(File.realpath(__FILE__))) 14 | RAKEFILE = topdir + "/Rakefile" 15 | 16 | class TestGemCodesearch < Test::Unit::TestCase 17 | def setup 18 | @workdir = Dir.mktmpdir('gem-codesearch') 19 | ENV['MILKODE_DEFAULT_DIR'] = "#{@workdir}/milkode" 20 | end 21 | 22 | def test_milk_init 23 | assert(!File.exist?("#{@workdir}/milkode")) 24 | system(MILK_COMMAND, 'init', '--default', :out => IO::NULL) 25 | assert($?.success?) 26 | assert(File.exist?("#{@workdir}/milkode")) 27 | end 28 | 29 | def start_gem_server 30 | # "gem server" serves gem files already installed. 31 | gem_server_command = [GEM_COMMAND, 'server', '-b', '127.0.0.1', '-p', '0'] 32 | pid = nil 33 | PTY.open {|m, s| 34 | s.raw! 35 | pid = spawn(*gem_server_command, :out => s, :err => IO::NULL) 36 | line = m.gets 37 | unless %r{Server started at (http:\S+)} =~ line 38 | flunk "unexpected 'gem server' message: #{line.inspect}" 39 | end 40 | url = $1 41 | yield [pid, url] 42 | } 43 | ensure 44 | if pid 45 | Process.kill :TERM, pid 46 | Process.wait pid 47 | end 48 | end 49 | 50 | def test_gem_server 51 | start_gem_server {|pid, url| } 52 | end 53 | 54 | def test_all 55 | start_gem_server {|pid, url| 56 | ENV['GEM_CODESEARCH_URL'] = url 57 | ENV['GEM_CODESEARCH_DIR'] = @workdir 58 | system(MILK_COMMAND, 'init', '--default', :out => IO::NULL) 59 | system(RAKE_COMMAND, '-f', RAKEFILE, 'all', [:out, :err] => IO::NULL) 60 | system(RAKE_COMMAND, '-f', RAKEFILE, 'all', [:out, :err] => IO::NULL) # update exisiting index. 61 | } 62 | end 63 | 64 | def teardown 65 | FileUtils.rmtree @workdir 66 | ENV['MILKODE_DEFAULT_DIR'] = nil 67 | ENV['GEM_CODESEARCH_URL'] = nil 68 | ENV['GEM_CODESEARCH_DIR'] = nil 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /test/test_bin.rb: -------------------------------------------------------------------------------- 1 | require 'test/unit' 2 | require 'rbconfig' 3 | 4 | RUBY_COMMAND = RbConfig.ruby 5 | 6 | topdir = File.dirname(File.dirname(File.realpath(__FILE__))) 7 | GEM_CODESEARCH_SETUP_COMMAND = topdir + "/bin/gem-codesearch-setup" 8 | 9 | class TestGemCodesearch < Test::Unit::TestCase 10 | def test_setup_help 11 | command = [RUBY_COMMAND, GEM_CODESEARCH_SETUP_COMMAND, 'help'] 12 | message = IO.popen(command) {|io| 13 | io.read 14 | } 15 | assert_match(/rake help/, message) 16 | assert_match(/rake all/, message) 17 | end 18 | end 19 | --------------------------------------------------------------------------------