├── Gemfile ├── lib ├── bosonnlp │ ├── version.rb │ ├── mixin.rb │ └── util.rb └── bosonnlp.rb ├── .gitignore ├── bosonnlp.gemspec ├── examples ├── usage.rb └── usage.rb, └── README.md /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem "httpclient" 4 | -------------------------------------------------------------------------------- /lib/bosonnlp/version.rb: -------------------------------------------------------------------------------- 1 | class Bosonnlp 2 | VERSION = '0.1.1' 3 | end 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | tags 3 | pkg 4 | coverage 5 | doc 6 | reports 7 | Gemfile.lock 8 | .rbx 9 | .bundle 10 | *.gem 11 | *.swp 12 | -------------------------------------------------------------------------------- /lib/bosonnlp/mixin.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | require 'bosonnlp' 3 | 4 | # Mixin module 5 | module BosonnlpMixin 6 | def method_missing(name, *args) 7 | case name.to_s 8 | when /^(c_|m_|s_)(.+)$/ 9 | @@nlp ||= Bosonnlp.new 10 | data = self 11 | data = [data] if self.class == String 12 | 13 | @@nlp.send name, data, *args 14 | else 15 | super 16 | end 17 | end 18 | end 19 | 20 | # Mixin it 21 | class Array 22 | include BosonnlpMixin 23 | end 24 | 25 | # Mixin it 26 | class String 27 | include BosonnlpMixin 28 | end 29 | -------------------------------------------------------------------------------- /bosonnlp.gemspec: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | 3 | require_relative 'lib/bosonnlp/version' 4 | 5 | Gem::Specification.new do |s| 6 | s.name = 'bosonnlp' 7 | s.version = Bosonnlp::VERSION 8 | s.date = '2014-10-07' 9 | s.authors = ['CC'] 10 | s.email = 'chcoalc@gmail.com' 11 | s.homepage = 'http://github.com/alal/bosonnlp' 12 | s.platform = Gem::Platform::RUBY 13 | s.summary = 'Bosonnlp Ruby SDK' 14 | s.description = 'bosonnlp.com' 15 | s.files = FileList['lib/*.rb', 'lib/bosonnlp/*.rb'] 16 | s.require_path = 'lib' 17 | s.add_runtime_dependency 'httpclient', '~> 2.5', '>= 2.5.0' 18 | s.license = 'Apache' 19 | s.required_ruby_version = '>= 1.9' 20 | end 21 | -------------------------------------------------------------------------------- /lib/bosonnlp/util.rb: -------------------------------------------------------------------------------- 1 | class Bosonnlp 2 | # 'Utils' 3 | module Util 4 | def get_hash_args(*args) 5 | if Hash === args[-1] 6 | h = args[-1] 7 | else 8 | h = {} 9 | end 10 | h 11 | end 12 | 13 | # :query, :body, :header, :follow_redirect 14 | def _http_request(method, url, *args) 15 | request_args = get_hash_args(*args) 16 | url = get_url url unless url.to_s.start_with?('http://') 17 | request_args[:header] = @head 18 | request_args[:body] = \ 19 | JSON.dump(request_args[:body]) if request_args[:body] 20 | 21 | res = @http_session.request(method, url, request_args) 22 | status = res.status 23 | raise HTTPError, 'HTTPError: %s %s'\ 24 | % [status, (JSON.load(res.content)['message'] rescue res.reason)]\ 25 | if status >= 400 && status < 600 26 | res 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /examples/usage.rb: -------------------------------------------------------------------------------- 1 | require 'bosonnlp' 2 | 3 | # run `export BOSON_API_TOKEN=""` first. e,g, 4 | # `export BOSON_API_TOKEN="bmw.1999.eW91ciB0b2tl"` 5 | 6 | # -------------- string and list style ---------------- 7 | p '美好'.c_sentiment 8 | p ['美好', '悲惨'].c_sentiment 9 | 10 | p ['很好吃, 有点贵', '贵了点, 好吃', 11 | '价格贵, 但很好吃', '很好吃, 但很贵'].m_comments 12 | 13 | p ['病毒式媒体网站:让新闻迅速蔓延'].s_keywords 14 | 15 | # ------------------- normal style -------------------- 16 | # You can pass your token like `Bosonnlp.new("")` 17 | nlp = Bosonnlp.new 18 | 19 | p nlp.c_sentiment(['美好','悲惨']) 20 | p nlp.c_ner(['对于该小孩是不是郑尚金的孩子,目前已做亲子鉴定,结果还没出来,',\ 21 | '纪检部门仍在调查之中。成都商报记者 姚永忠']) 22 | p nlp.c_depparser(['我以最快的速度吃了午饭', '先留长头发再剃个秃子']) 23 | p nlp.c_tag(['这个世界好复杂', '计算机是科学么']) 24 | p nlp.c_classify(['俄否决安理会谴责叙军战机空袭阿勒颇平民', 25 | '邓紫棋谈男友林宥嘉:我觉得我比他唱得好', 26 | 'Facebook收购印度初创公司']) 27 | 28 | # ------------ mutiple texts API ------------------ 29 | p nlp.m_cluster(['今天天气好', '今天天气好', '今天天气不错', '点点楼头细雨',\ 30 | '重重江外平湖', '当年戏马会东徐', '今日凄凉南浦']) 31 | p nlp.m_comments(['很好吃, 有点贵', '贵了点, 好吃', 32 | '价格贵, 但很好吃', '很好吃, 但很贵']) 33 | 34 | # -- push more than one time (Handle large amount of texts with mutiple API) -- 35 | mh = nlp.create_multiple(:comments) 36 | mh.push(['很好吃, 有点贵', '贵了点, 好吃']) 37 | mh.push(['价格贵, 但很好吃', '很好吃, 但很贵']) 38 | mh.analysis # Start computing on the server. 39 | p mh.result # Call this will block the client until get result from the server. 40 | 41 | mh.push(['很好吃, 有点贵', '贵了点, 好吃']) # Yes, play incrementally! 42 | mh.analysis 43 | p mh.result 44 | 45 | mh.clear # Clear the texts. 46 | 47 | # ------------------------------------- 48 | #p nlp.s_time(['2013年二月二十八日下午四点三十分二十九秒']) 49 | p nlp.s_keywords(['病毒式媒体网站:让新闻迅速蔓延']) 50 | 51 | # ----------- Handle extra param ------------ 52 | query = { 'top_k' => 3 } 53 | p nlp.s_suggest(['粉丝'], :query => query) 54 | -------------------------------------------------------------------------------- /examples/usage.rb,: -------------------------------------------------------------------------------- 1 | require 'bosonnlp' 2 | 3 | # run `export BOSON_API_TOKEN=""` first. e,g, 4 | # `export BOSON_API_TOKEN="bmw.1999.eW91ciB0b2tl"` 5 | 6 | # -------------- string and list style ---------------- 7 | p '美好'.c_sentiment 8 | p ['美好', '悲惨'].c_sentiment 9 | 10 | p ['很好吃, 有点贵', '贵了点, 好吃', 11 | '价格贵, 但很好吃', '很好吃, 但很贵'].m_comments 12 | 13 | p ['病毒式媒体网站:让新闻迅速蔓延'].s_keywords 14 | 15 | # ------------------- normal style -------------------- 16 | # You can pass your token like `Bosonnlp.new("")` 17 | nlp = Bosonnlp.new 18 | 19 | p nlp.c_sentiment(['美好','悲惨']) 20 | p nlp.c_ner(['对于该小孩是不是郑尚金的孩子,目前已做亲子鉴定,结果还没出来,',\ 21 | '纪检部门仍在调查之中。成都商报记者 姚永忠']) 22 | p nlp.c_depparser(['我以最快的速度吃了午饭', '先留长头发再剃个秃子']) 23 | p nlp.c_tag(['这个世界好复杂', '计算机是科学么']) 24 | p nlp.c_classify(['俄否决安理会谴责叙军战机空袭阿勒颇平民', 25 | '邓紫棋谈男友林宥嘉:我觉得我比他唱得好', 26 | 'Facebook收购印度初创公司']) 27 | 28 | # ------------ mutiple texts API ------------------ 29 | p nlp.m_cluster(['今天天气好', '今天天气好', '今天天气不错', '点点楼头细雨',\ 30 | '重重江外平湖', '当年戏马会东徐', '今日凄凉南浦']) 31 | p nlp.m_comments(['很好吃, 有点贵', '贵了点, 好吃', 32 | '价格贵, 但很好吃', '很好吃, 但很贵']) 33 | 34 | # -- push more than one time (Handle large amount of texts with mutiple API) -- 35 | mh = nlp.create_multiple(:comments) 36 | mh.push(['很好吃, 有点贵', '贵了点, 好吃']) 37 | mh.push(['价格贵, 但很好吃', '很好吃, 但很贵']) 38 | mh.analysis # Start computing on the server. 39 | p mh.result # Call this will block the client until get result from the server. 40 | 41 | mh.push(['很好吃, 有点贵', '贵了点, 好吃']) # Yes, play incrementally! 42 | mh.analysis 43 | p mh.result 44 | 45 | mh.clear # Clear the texts. 46 | 47 | # ------------------------------------- 48 | #p nlp.s_time(['2013年二月二十八日下午四点三十分二十九秒']) 49 | p nlp.s_keywords(['病毒式媒体网站:让新闻迅速蔓延']) 50 | 51 | # ----------- Handle extra param ------------ 52 | query = { 'top_k' => 3 } 53 | p nlp.s_suggest(['粉丝'], :query => query) 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bosonnlp - Boson NLP SDK for Ruby 2 | 3 | ### Install 4 | 5 | gem install bosonnlp 6 | 7 | ### Usage 8 | 9 | ```ruby 10 | # run `export BOSON_API_TOKEN=""` first. 11 | 12 | # ----- string and list style ----- 13 | require 'bosonnlp' 14 | p '美好'.c_sentiment 15 | p ['很好吃, 有点贵', '贵了点, 好吃', '价格贵, 但很好吃', '很好吃'].m_comments 16 | 17 | # ----- normal style (Handle extra param) ----- 18 | nlp = Bosonnlp.new 19 | query = { 'top_k' => 3 } 20 | p nlp.s_suggest(['粉丝'], :query => query) 21 | 22 | # -- push more than one time (Handle large amount of texts with mutiple API) -- 23 | mh = nlp.create_multiple(:comments) 24 | mh.push(['很好吃, 有点贵', '贵了点, 好吃']) 25 | mh.push(['价格贵, 但很好吃', '很好吃, 但很贵']) 26 | mh.analysis # Start computing on the server. 27 | p mh.result # Call this will block until receive result from the server. 28 | 29 | mh.push(['很好吃, 有点贵', '贵了点, 好吃']) # Yes, play it incrementally! 30 | mh.analysis 31 | p mh.result 32 | 33 | mh.clear # Clear the texts. 34 | ``` 35 | 36 | You must have noticed the prefix **'c_','m_','s_'** before API names, e.g. sentiment API is called with the name 'c_sentiment'. 37 | 38 | There are three types of APIs provided by bosonnlp.com. 39 | 40 | This SDK supports APIs that even don't exists yet. It just need to know what kind of API it's handling by given the prefix. It's logic is not related to the API's name. 41 | 42 | - Start with "m_": Multiple texts ones, like cluster API, it's meaningless to 43 | cluster single text. 44 | - Start with "s_": Single ones, that limited to one text a time, and retrun one 45 | result, e.g. keywords API. 46 | - Start with "c_": Common single ones, just like single ones, but can accept 47 | multiple texts, and retrun multiple result, those results are not related 48 | to each other, e.g. sentiment API. 49 | 50 | 51 | Check http://docs.bosonnlp.com for API details. 52 | 53 | Check [examples/usage.rb](https://github.com/alal/bosonnlp/blob/master/examples/usage.rb) for more examples. 54 | -------------------------------------------------------------------------------- /lib/bosonnlp.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | require 'json' 4 | require 'httpclient' 5 | require 'securerandom' 6 | 7 | require 'bosonnlp/util' 8 | require 'bosonnlp/mixin' 9 | require 'bosonnlp/version' 10 | 11 | # Main class 12 | class Bosonnlp 13 | include Util 14 | # Operated task not found on server 15 | class TaskNotFoundError < StandardError 16 | end 17 | # Other task error return by server 18 | class TaskError < StandardError 19 | end 20 | # Http error 21 | class HTTPError < RuntimeError 22 | end 23 | # Task time out. 24 | class TimeoutError < StandardError 25 | end 26 | 27 | # Multiple text engine handler 28 | class MultipleHandler 29 | include Util 30 | def initialize(name, http_session, head, multiple_url) 31 | @name = name 32 | @http_session = http_session 33 | @head = head 34 | @id = SecureRandom.uuid 35 | @multiple_url = multiple_url 36 | @alpha = 0.8 37 | @beta = 0.45 38 | end 39 | 40 | def pre_process(data) 41 | fail ArgumentError, 'Wrong args, data must be a list'\ 42 | ' which contains more than one element.'\ 43 | unless Array === data and data.size > 1 44 | fail ArgumentError, 'Wrong args, data elements must be list of string '\ 45 | 'or {_id, text} Hash.'\ 46 | unless String === data[0] or Hash === data[0] 47 | 48 | processd = [] 49 | if String === data[0] 50 | data.each do |d| 51 | hash_t = {} 52 | hash_t['_id'] = SecureRandom.uuid 53 | hash_t['text'] = d 54 | processd << hash_t 55 | end 56 | else 57 | processd = data 58 | end 59 | 60 | processd 61 | end 62 | 63 | def get_url(method_name) 64 | @multiple_url % [@name, method_name.to_s, @id] 65 | end 66 | 67 | def push(*args) 68 | processd = pre_process args[0] 69 | request_args = get_hash_args(*args) 70 | 71 | is_ok = false 72 | processd.each_slice 100 do |s| 73 | request_args[:body] = s 74 | res = _http_request(:post, __method__, request_args) 75 | is_ok = res.ok? 76 | # TODO, fail for one exception? 77 | end 78 | is_ok 79 | end 80 | 81 | def analysis(*args) 82 | request_args = get_hash_args(*args) 83 | request_args['alpha'] ||= @alpha 84 | request_args['beta'] ||= @beta 85 | res = _http_request(:get, __method__, *args) 86 | res.ok? 87 | end 88 | 89 | def status(*args) 90 | res = _http_request(:get, __method__, *args) 91 | status = JSON.load(res.content)['status'] 92 | case status.to_s.downcase 93 | when 'not found' 94 | raise TaskNotFoundError, "Operated task #{@id} not found on server." 95 | when 'error' 96 | raise TaskError, "Task #{@id} failed on server." 97 | end 98 | status 99 | end 100 | 101 | def _wait(timeout) 102 | time_elapsed = 0.0 103 | sleep_lenth = 1.0 104 | while true 105 | sleep sleep_lenth 106 | serv_status = status 107 | return if serv_status.downcase == "done" 108 | time_elapsed += sleep_lenth 109 | raise TimeoutError if timeout and time_elapsed > timeout 110 | sleep_lenth *= 1.5 if sleep_lenth < 2**6 111 | end 112 | end 113 | 114 | def result(*args) 115 | _wait get_hash_args(*args)['timeout'] 116 | res = _http_request(:get, __method__, *args) 117 | JSON.load(res.content) 118 | end 119 | 120 | def clear(*args) 121 | res = _http_request(:get, __method__, *args) 122 | res.ok? 123 | end 124 | end 125 | 126 | def initialize(token = nil) 127 | @token = token 128 | @env_token = ENV['BOSON_API_TOKEN'] 129 | proxy = ENV['HTTP_PROXY'] 130 | @http_session = HTTPClient.new(proxy) 131 | 132 | @readable = false 133 | @fail_on_exception = false 134 | @base_url = 'http://api.bosonnlp.com' 135 | @analysis_url = @base_url + '/%s/analysis' 136 | @multiple_url = @base_url + '/%s/%s/%s' 137 | 138 | @token ||= @env_token 139 | fail 'No API token given or found in environment variables, run '\ 140 | '`export BOSON_API_TOKEN=""` in your shell first.'\ 141 | unless @token 142 | 143 | headers = {} 144 | headers['Accept'] = 'application/json' 145 | headers['User-Agent'] = "bosonnlp-ruby #{Bosonnlp::VERSION} "\ 146 | "(httpclient #{HTTPClient::VERSION})" 147 | headers['Content-Type'] = 'application/json' 148 | headers['X-Token'] = @token 149 | @head = headers 150 | 151 | # TODO, args to change default values 152 | end 153 | 154 | def common_api(name, *args) 155 | fail ArgumentError, 'Wrong args, first arg must be a list of string.'\ 156 | unless Array === args[0] and String === args[0][0] 157 | 158 | url = @analysis_url % name 159 | 160 | request_args = args[1] || {} 161 | request_args[:body] = args[0] 162 | 163 | res = _http_request(:post, url, request_args) 164 | JSON.load(res.content) 165 | end 166 | 167 | def create_multiple(name) 168 | MultipleHandler.new name.to_s, @http_session, @head, @multiple_url 169 | end 170 | 171 | def multiple_api(name, *args) 172 | begin 173 | mh = create_multiple(name) 174 | mh.push(*args) 175 | mh.analysis(*args) 176 | return mh.result 177 | rescue => e 178 | puts e.message 179 | puts e.backtrace.inspect 180 | ensure 181 | mh.clear(*args) if mh 182 | end 183 | end 184 | 185 | def single_api(name, *args) 186 | fail ArgumentError, 'Wrong args, data must be a list'\ 187 | ' which contains one and only one string element.'\ 188 | unless Array === args[0] and String === args[0][0] and args[0].size == 1 189 | 190 | url = @analysis_url % name 191 | 192 | request_args = get_hash_args(*args) 193 | request_args[:body] = args[0][0] 194 | 195 | res = _http_request(:post, url, request_args) 196 | JSON.load(res.content) 197 | # :query, :body, :header, :follow_redirect 198 | end 199 | 200 | def method_missing(name, *args) 201 | case name.to_s 202 | when /^c_(.+)$/ 203 | common_api($1, *args) 204 | when /^m_(.+)$/ 205 | multiple_api($1, *args) 206 | when /^s_(.+)$/ 207 | single_api($1, *args) 208 | else 209 | super 210 | end 211 | end 212 | end 213 | --------------------------------------------------------------------------------