├── LICENSE ├── config ├── src ├── ngx_http_sphinx2_stream.h ├── ngx_http_sphinx2_args_parser.h ├── ngx_http_sphinx2_stream.c ├── ngx_http_sphinx2_args_parser.c ├── ngx_http_sphinx2_sphx.h ├── ngx_http_sphinx2_module.c └── ngx_http_sphinx2_sphx.c └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config: -------------------------------------------------------------------------------- 1 | ngx_addon_name=ngx_http_sphinx2_module 2 | 3 | HTTP_MODULES="$HTTP_MODULES ngx_http_sphinx2_module" 4 | 5 | NGX_ADDON_DEPS="$NGX_ADDON_DEPS $ngx_addon_dir/src/ngx_http_sphinx2_args_parser.h $ngx_addon_dir/src/ngx_http_sphinx2_stream.h $ngx_addon_dir/src/ngx_http_sphinx2_sphx.h" 6 | 7 | NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ngx_addon_dir/src/ngx_http_sphinx2_args_parser.c $ngx_addon_dir/src/ngx_http_sphinx2_stream.c $ngx_addon_dir/src/ngx_http_sphinx2_sphx.c $ngx_addon_dir/src/ngx_http_sphinx2_module.c" 8 | -------------------------------------------------------------------------------- /src/ngx_http_sphinx2_stream.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Stream abstraction for ngx_buf_t 3 | */ 4 | 5 | #ifndef NGX_HTTP_SPHINX2_STREAM_H 6 | #define NGX_HTTP_SPHINX2_STREAM_H 7 | 8 | /* TYPES */ 9 | 10 | typedef struct sphx2_stream_s sphx2_stream_t; 11 | 12 | /* PROTOTYPES */ 13 | 14 | /* create stream */ 15 | sphx2_stream_t* 16 | sphx2_stream_create(ngx_pool_t*); 17 | 18 | /* allocate buffer (for writes) */ 19 | ngx_int_t 20 | sphx2_stream_alloc(sphx2_stream_t * strm, size_t len); 21 | 22 | /* set given buffer (for reads) */ 23 | ngx_int_t 24 | sphx2_stream_set_buf(sphx2_stream_t * strm, ngx_buf_t * b); 25 | 26 | /* get the buffer */ 27 | ngx_buf_t* 28 | sphx2_stream_get_buf(sphx2_stream_t * strm); 29 | 30 | /* get current offset in stream */ 31 | ngx_uint_t 32 | sphx2_stream_offset(sphx2_stream_t * strm); 33 | 34 | /* get max size of stream */ 35 | ngx_uint_t 36 | sphx2_stream_maxsize(sphx2_stream_t * strm); 37 | 38 | /* writes */ 39 | ngx_int_t 40 | sphx2_stream_write_int16(sphx2_stream_t * strm, uint16_t val); 41 | 42 | ngx_int_t 43 | sphx2_stream_write_int32(sphx2_stream_t * strm, uint32_t val); 44 | 45 | ngx_int_t 46 | sphx2_stream_write_int64(sphx2_stream_t * strm, uint64_t val); 47 | 48 | ngx_int_t 49 | sphx2_stream_write_float(sphx2_stream_t * strm, float val); 50 | 51 | ngx_int_t 52 | sphx2_stream_write_string(sphx2_stream_t * strm, ngx_str_t * val); 53 | 54 | /* reads */ 55 | ngx_int_t 56 | sphx2_stream_read_int16(sphx2_stream_t * strm, uint16_t * val); 57 | 58 | ngx_int_t 59 | sphx2_stream_read_int32(sphx2_stream_t * strm, uint32_t * val); 60 | 61 | ngx_int_t 62 | sphx2_stream_read_int64(sphx2_stream_t * strm, uint64_t * val); 63 | 64 | ngx_int_t 65 | sphx2_stream_read_float(sphx2_stream_t * strm, float * val); 66 | 67 | ngx_int_t 68 | sphx2_stream_read_string(sphx2_stream_t * strm, ngx_str_t ** val); 69 | 70 | #endif /* NGX_HTTP_SPHINX2_STREAM_H */ 71 | -------------------------------------------------------------------------------- /src/ngx_http_sphinx2_args_parser.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Search query string parameters parsing 3 | */ 4 | 5 | #ifndef SPHX2_QUERY_STRING_PARAMS_PARSING_H 6 | #define SPHX2_QUERY_STRING_PARAMS_PARSING_H 7 | 8 | 9 | /* TYPES */ 10 | 11 | typedef enum { 12 | SPHX2_ARG_TYPE_NONE = 0, 13 | SPHX2_ARG_TYPE_INTEGER = 0x01, 14 | SPHX2_ARG_TYPE_INTEGER64 = 0x02, 15 | SPHX2_ARG_TYPE_FLOAT = 0x04, 16 | SPHX2_ARG_TYPE_STRING = 0x08, 17 | SPHX2_ARG_TYPE_ENUM = 0x10, 18 | SPHX2_ARG_TYPE_KEYVAL = 0x1000 19 | } sphx2_arg_type_t; 20 | 21 | #define SPHX2_ARG_TYPE_MASK 0x1F 22 | 23 | typedef struct { 24 | sphx2_arg_type_t param_type; 25 | const char ** str_arr; 26 | size_t sz_str_arr; 27 | } sphx2_arg_parse_hint_t; 28 | 29 | typedef struct { 30 | char * input; 31 | sphx2_arg_parse_hint_t * hints; 32 | const char * delimiter; 33 | char * curr; 34 | size_t num_tokens; 35 | ngx_pool_t * pool; 36 | } sphx2_arg_parse_ctx_t; 37 | 38 | 39 | /* PROTOTYPES */ 40 | 41 | /* register the input, hints and delimiter to create a parsing context */ 42 | ngx_int_t 43 | sphx2_arg_parse_register( 44 | sphx2_arg_parse_ctx_t * ctxt, 45 | ngx_pool_t * pool, 46 | char * input, 47 | sphx2_arg_parse_hint_t * hints, 48 | const char * delimiter); 49 | 50 | /* register current 'token' of a context to another to parse it further */ 51 | ngx_int_t 52 | sphx2_arg_parse_register_child( 53 | sphx2_arg_parse_ctx_t * to, 54 | sphx2_arg_parse_ctx_t * from, 55 | ngx_pool_t * pool, 56 | sphx2_arg_parse_hint_t * hints, 57 | const char * delimiter); 58 | 59 | /* take one tokenizing step, so curr and input pointers are modified */ 60 | ngx_int_t 61 | sphx2_arg_step(sphx2_arg_parse_ctx_t * ctxt); 62 | 63 | /* parse the whole input using the hints and copy the results in the 64 | * order specified by hints array into the target pointer arg assuming 65 | * it is a struct pointer with elements of types specified in hints 66 | * occurring in that order 67 | */ 68 | ngx_int_t 69 | sphx2_arg_parse_whole_using_hints( 70 | sphx2_arg_parse_ctx_t * ctxt, 71 | void * ptr); 72 | 73 | /* get a string arg - internally does a 'step' and also a check using 74 | * the hints if available 75 | */ 76 | ngx_str_t* 77 | sphx2_arg_parse_get_str_arg(sphx2_arg_parse_ctx_t * ctxt); 78 | 79 | /* get an integer arg - internally does a 'step' and also a check using 80 | * the hints if available 81 | */ 82 | uint32_t 83 | sphx2_arg_parse_get_int_arg(sphx2_arg_parse_ctx_t * ctxt); 84 | 85 | /* get an int64 arg - internally does a 'step' and also a check using 86 | * the hints if available 87 | */ 88 | uint64_t 89 | sphx2_arg_parse_get_int64_arg(sphx2_arg_parse_ctx_t * ctxt); 90 | 91 | /* get a float arg - internally does a 'step' and also a check using 92 | * the hints if available 93 | */ 94 | float 95 | sphx2_arg_parse_get_float_arg(sphx2_arg_parse_ctx_t * ctxt); 96 | 97 | /* get an enum arg - internally does a 'step' and also a check using 98 | * the hints if available. the matching str-arr if specified overrides 99 | * the hints (if specified). one of the arg here or hint must be 100 | * available. 101 | */ 102 | int32_t 103 | sphx2_arg_parse_get_enum_arg( 104 | sphx2_arg_parse_ctx_t * ctxt, 105 | const char * strarr[], 106 | size_t sz_strarr); 107 | 108 | #endif /* SPHX2_QUERY_STRING_PARAMS_PARSING_H */ 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | sphinx2-nginx-module 2 | ==================== 3 | 4 | Nginx upstream module for Sphinx 2.x search daemon 5 | 6 | *This module is not distributed with the Nginx source.* See the installation 7 | instructions. 8 | 9 | Status 10 | 11 | Alpha [Work in progress] 12 | 13 | Version 14 | 15 | 0.2 16 | 17 | Synopsis 18 | 19 | # Search 20 | # /search?offset=0&nres=20&match=all 21 | # &ranker=proxbm25&rankexpr= 22 | # &sort=relevance&sortby=myattr 23 | # &keywords=Anna+Hazare 24 | # &index=myidx 25 | # &filters=a1,in,range,10,20;a2,ex,range,10,20;a3,in,frange,10.00,20.00 26 | # &group=day,attr,@group desc,attr2 27 | # &maxres=1000 28 | # &geo=latattr,lonattr,10.00,10.00 29 | # &idxweights=myidx:50;othidx:10 30 | # &fldweights=content:100;title:0 31 | # &format=json 32 | location /search { 33 | set_unescape_uri $sphinx2_command "search"; 34 | set_unescape_uri $sphx_offset $arg_offset; 35 | set_unescape_uri $sphx_numresults $arg_nres; 36 | set_unescape_uri $sphx_matchmode $arg_match; 37 | set_unescape_uri $sphx_ranker $arg_ranker; 38 | set_unescape_uri $sphx_rankexpr $arg_rankexpr; 39 | set_unescape_uri $sphx_sortmode $arg_sort; 40 | set_unescape_uri $sphx_sortby $arg_sortby; 41 | set_unescape_uri $sphx_keywords $arg_keywords; 42 | set_unescape_uri $sphx_index $arg_index; 43 | set_unescape_uri $sphx_filters $arg_filters; 44 | set_unescape_uri $sphx_group $arg_group; 45 | set_unescape_uri $sphx_maxmatches $arg_maxres; 46 | set_unescape_uri $sphx_geo $arg_geo; 47 | set_unescape_uri $sphx_indexweights $arg_idxweights; 48 | set_unescape_uri $sphx_fieldweights $arg_fldweights; 49 | set_unescape_uri $sphx_outputtype $arg_format; 50 | set_unescape_uri $sphx_docs ""; 51 | set_unescape_uri $sphx_excerpt_opts ""; 52 | sphinx2_pass 127.0.0.1:9312; 53 | } 54 | 55 | # Excerpt 56 | # /excerpt?keywords=Anna+Hazare&index=myidx 57 | # &opts=before_match:,after_match:,chunk_separator: ..., 58 | # limit:256,limit_passages:0,limit_words:0,around:5, 59 | # exact_phrase:0,single_passage:0,use_boundaries:0, 60 | # weight_order:0,query_mode:0,force_all_words:0, 61 | # start_passage_id:1,load_files:0,html_strip_mode:index, 62 | # allow_empty:0,passage_boundary:none,emit_zones:0, 63 | # load_files_scattered:0 64 | # &docs= 65 | location /excerpt { 66 | set_unescape_uri $sphinx2_command "excerpt"; 67 | set_unescape_uri $sphx_offset ""; 68 | set_unescape_uri $sphx_numresults ""; 69 | set_unescape_uri $sphx_matchmode ""; 70 | set_unescape_uri $sphx_ranker ""; 71 | set_unescape_uri $sphx_rankexpr ""; 72 | set_unescape_uri $sphx_sortmode ""; 73 | set_unescape_uri $sphx_sortby ""; 74 | set_unescape_uri $sphx_keywords $arg_keywords; 75 | set_unescape_uri $sphx_index $arg_index; 76 | set_unescape_uri $sphx_filters ""; 77 | set_unescape_uri $sphx_group ""; 78 | set_unescape_uri $sphx_maxmatches ""; 79 | set_unescape_uri $sphx_geo ""; 80 | set_unescape_uri $sphx_indexweights ""; 81 | set_unescape_uri $sphx_fieldweights ""; 82 | set_unescape_uri $sphx_outputtype ""; 83 | set_unescape_uri $sphx_docs $arg_docs; 84 | set_unescape_uri $sphx_excerpt_opts $arg_opts; 85 | sphinx2_pass 127.0.0.1:9312; 86 | } 87 | 88 | Description 89 | 90 | This is an Nginx upstream module that makes nginx talk to a Sphinx 91 | () 2.0.8 server in a non-blocking way. 92 | 93 | Following features are supported as of now. 94 | 1 Search 95 | 2 Excerpt 96 | 97 | The module outputs the raw TCP response from searchd minus the 98 | handshake and header bytes. 99 | 100 | Compatibility 101 | 102 | Verified with: 103 | 104 | * nginx-1.4.3 105 | * sphinx-2.0.8 106 | 107 | Limitations 108 | 109 | * Portions of code are 64-bit specific. 110 | * Portions of code are Little Endian specific. 111 | * Some parameters for Sphinx searchd request have been given default 112 | values and user cannot control them through a query string param. The 113 | details are as follows: 114 | 115 | 116 | Installation Instructions 117 | 118 | * Get nginx source from nginx.org (see "Comptability" above) 119 | 120 | * Download a release tarball of sphinx2-nginx-module from 121 | https://github.com/reeteshranjan/sphinx2-nginx-module/releases 122 | 123 | * Include --add-module=/path/to/sphinx2-nginx-module directive in the 124 | configure command for building nginx source. 125 | 126 | * make & make install (assuming you have permission for install folders) 127 | 128 | * Use sample nginx conf file section above to modify your nginx conf. 129 | 130 | Query Parameters Description 131 | 132 | 133 | -------------------------------------------------------------------------------- /src/ngx_http_sphinx2_stream.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx2 request/response buffer stream using ngx_buf_t 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "ngx_http_sphinx2_stream.h" 10 | 11 | /* TYPES */ 12 | 13 | struct sphx2_stream_s { 14 | ngx_pool_t * pool; 15 | ngx_buf_t * b; 16 | }; 17 | 18 | /* FUNCTION DEFINITIONS */ 19 | 20 | /* create stream */ 21 | sphx2_stream_t* 22 | sphx2_stream_create(ngx_pool_t * pool) 23 | { 24 | sphx2_stream_t *s; 25 | 26 | if(NULL == (s = ngx_pcalloc(pool, sizeof(sphx2_stream_t)))) 27 | return(NULL); 28 | 29 | s->pool = pool; 30 | 31 | return(s); 32 | } 33 | 34 | /* allocate buffer (for writes) */ 35 | ngx_int_t 36 | sphx2_stream_alloc( 37 | sphx2_stream_t * strm, 38 | size_t len) 39 | { 40 | if(NULL == (strm->b = ngx_create_temp_buf(strm->pool, len))) { 41 | return(NGX_ERROR); 42 | } 43 | 44 | return(NGX_OK); 45 | } 46 | 47 | /* set given buffer (for reads) */ 48 | ngx_int_t 49 | sphx2_stream_set_buf( 50 | sphx2_stream_t * strm, 51 | ngx_buf_t * b) 52 | { 53 | assert(NULL != b); 54 | strm->b = b; 55 | return (NGX_OK); 56 | } 57 | 58 | /* get the buffer */ 59 | ngx_buf_t* 60 | sphx2_stream_get_buf(sphx2_stream_t * strm) 61 | { 62 | return(strm->b); 63 | } 64 | 65 | /* get current offset in stream */ 66 | ngx_uint_t 67 | sphx2_stream_offset(sphx2_stream_t * strm) 68 | { 69 | assert(NULL != strm->b); 70 | return(strm->b->last - strm->b->pos); 71 | } 72 | 73 | /* get max size of stream */ 74 | ngx_uint_t 75 | sphx2_stream_maxsize(sphx2_stream_t * strm) 76 | { 77 | assert(NULL != strm->b); 78 | return(strm->b->end - strm->b->start); 79 | } 80 | 81 | /* writes */ 82 | 83 | #define CHECK_AND_APPEND(strm, type, val) \ 84 | do { \ 85 | if(sizeof(type) > (size_t)(strm->b->end - strm->b->last)) { \ 86 | return (NGX_ERROR); \ 87 | } \ 88 | memcpy(strm->b->last, &val, sizeof(type)); \ 89 | strm->b->last += sizeof(type); \ 90 | } while(0) 91 | 92 | #define CHECK_AND_APPEND_STR(strm, val) \ 93 | do { \ 94 | uint32_t conv = htonl((uint32_t)val->len); \ 95 | CHECK_AND_APPEND(strm, uint32_t, conv); \ 96 | if(val->len > (size_t)(strm->b->end - strm->b->last)) { \ 97 | return (NGX_ERROR); \ 98 | } \ 99 | memcpy(strm->b->last, val->data, val->len); \ 100 | strm->b->last += val->len; \ 101 | } while(0) 102 | 103 | ngx_int_t 104 | sphx2_stream_write_int16( 105 | sphx2_stream_t * strm, 106 | uint16_t val) 107 | { 108 | assert(NULL != strm->b && NULL != strm->b->last); 109 | 110 | val = htons(val); 111 | 112 | CHECK_AND_APPEND(strm, uint16_t, val); 113 | 114 | return(NGX_OK); 115 | } 116 | 117 | ngx_int_t 118 | sphx2_stream_write_int32( 119 | sphx2_stream_t * strm, 120 | uint32_t val) 121 | { 122 | assert(NULL != strm->b && NULL != strm->b->last); 123 | 124 | val = htonl(val); 125 | 126 | CHECK_AND_APPEND(strm, uint32_t, val); 127 | 128 | return(NGX_OK); 129 | } 130 | 131 | ngx_int_t 132 | sphx2_stream_write_int64( 133 | sphx2_stream_t * strm, 134 | uint64_t val) 135 | { 136 | assert(NULL != strm->b && NULL != strm->b->last); 137 | 138 | val = __bswap_64(val); 139 | 140 | CHECK_AND_APPEND(strm, uint64_t, val); 141 | 142 | return(NGX_OK); 143 | } 144 | 145 | ngx_int_t 146 | sphx2_stream_write_float( 147 | sphx2_stream_t * strm, 148 | float val) 149 | { 150 | uint32_t conv; 151 | 152 | assert(NULL != strm->b && NULL != strm->b->last); 153 | 154 | /* treat float as 32-bit dword */ 155 | conv = htonl(*((uint32_t*)&val)); 156 | 157 | CHECK_AND_APPEND(strm, uint32_t, conv); 158 | 159 | return(NGX_OK); 160 | } 161 | 162 | ngx_int_t 163 | sphx2_stream_write_string( 164 | sphx2_stream_t * strm, 165 | ngx_str_t * val) 166 | { 167 | assert(NULL != strm->b && NULL != strm->b->last); 168 | 169 | CHECK_AND_APPEND_STR(strm, val); /* len + str (no null char) */ 170 | 171 | return(NGX_OK); 172 | } 173 | 174 | /* reads */ 175 | 176 | #define CHECK_AND_READ(strm, type, val) \ 177 | do { \ 178 | if(sizeof(type) > (size_t)(strm->b->last - strm->b->pos)) { \ 179 | return (NGX_ERROR); \ 180 | } \ 181 | memcpy(val, strm->b->pos, sizeof(type)); \ 182 | strm->b->pos += sizeof(type); \ 183 | } while(0) 184 | 185 | #define CHECK_AND_READ_STR(strm, val) \ 186 | do { \ 187 | CHECK_AND_READ(strm, uint32_t, &((val)->len)); \ 188 | (val)->len = ntohl((val)->len); \ 189 | if((val)->len > (size_t)(strm->b->last - strm->b->pos)) { \ 190 | return (NGX_ERROR); \ 191 | } \ 192 | if(NULL == ((val)->data = ngx_palloc(strm->pool, (val)->len+1))) { \ 193 | return (NGX_ERROR); \ 194 | } \ 195 | memcpy((val)->data, strm->b->pos, (val)->len); \ 196 | (val)->data[(val)->len] = 0; \ 197 | strm->b->pos += (val)->len; \ 198 | } while(0) 199 | 200 | ngx_int_t 201 | sphx2_stream_read_int16( 202 | sphx2_stream_t * strm, 203 | uint16_t * val) 204 | { 205 | assert(NULL != strm->b && NULL != strm->b->pos); 206 | 207 | CHECK_AND_READ(strm, uint16_t, val); 208 | 209 | *val = ntohs(*val); 210 | 211 | return(NGX_OK); 212 | } 213 | 214 | ngx_int_t 215 | sphx2_stream_read_int32( 216 | sphx2_stream_t * strm, 217 | uint32_t * val) 218 | { 219 | assert(NULL != strm->b && NULL != strm->b->pos); 220 | 221 | CHECK_AND_READ(strm, uint32_t, val); 222 | 223 | *val = ntohl(*val); 224 | 225 | return(NGX_OK); 226 | } 227 | 228 | ngx_int_t 229 | sphx2_stream_read_int64( 230 | sphx2_stream_t * strm, 231 | uint64_t * val) 232 | { 233 | assert(NULL != strm->b && NULL != strm->b->pos); 234 | 235 | CHECK_AND_READ(strm, uint64_t, val); 236 | 237 | *val = __bswap_64(*val); 238 | 239 | return(NGX_OK); 240 | } 241 | 242 | ngx_int_t 243 | sphx2_stream_read_float( 244 | sphx2_stream_t * strm, 245 | float * val) 246 | { 247 | uint32_t v; 248 | 249 | assert(NULL != strm->b && NULL != strm->b->pos); 250 | 251 | CHECK_AND_READ(strm, uint32_t, &v); 252 | 253 | v = ntohl(v); 254 | 255 | memcpy(val, &v, sizeof(uint32_t)); 256 | 257 | return(NGX_OK); 258 | } 259 | 260 | ngx_int_t 261 | sphx2_stream_read_string( 262 | sphx2_stream_t * strm, 263 | ngx_str_t ** val) 264 | { 265 | assert(NULL != strm->b && NULL != strm->b->pos); 266 | 267 | if(NULL == (*val = ngx_palloc(strm->pool, sizeof(ngx_str_t)))) { 268 | return(NGX_ERROR); 269 | } 270 | 271 | CHECK_AND_READ_STR(strm, *val); 272 | 273 | return(NGX_OK); 274 | } 275 | -------------------------------------------------------------------------------- /src/ngx_http_sphinx2_args_parser.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx upstream module URI query params parsing 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "ngx_http_sphinx2_args_parser.h" 10 | 11 | /* FUNCTION DEFINITIONS */ 12 | 13 | /* register the input, hints and delimiter to create a parsing ctx */ 14 | ngx_int_t 15 | sphx2_arg_parse_register( 16 | sphx2_arg_parse_ctx_t * ctxt, 17 | ngx_pool_t * pool, 18 | char * input, 19 | sphx2_arg_parse_hint_t * hints, 20 | const char * delimiter) 21 | { 22 | ctxt->input = input; 23 | ctxt->hints = hints; 24 | ctxt->delimiter = delimiter; 25 | ctxt->curr = NULL; 26 | ctxt->num_tokens = 0; 27 | ctxt->pool = pool; 28 | 29 | return NGX_OK; 30 | } 31 | 32 | /* register current 'token' of a ctx to another to parse it further */ 33 | ngx_int_t 34 | sphx2_arg_parse_register_child( 35 | sphx2_arg_parse_ctx_t * to, 36 | sphx2_arg_parse_ctx_t * from, 37 | ngx_pool_t * pool, 38 | sphx2_arg_parse_hint_t * hints, 39 | const char * delimiter) 40 | { 41 | assert(NULL != from->curr); 42 | 43 | return sphx2_arg_parse_register(to, pool, from->curr, hints, delimiter); 44 | } 45 | 46 | /* take one tokenizing step, so curr and input pointers are modified */ 47 | ngx_int_t 48 | sphx2_arg_step(sphx2_arg_parse_ctx_t * ctxt) 49 | { 50 | #define SPHX2_ARG_KEYVAL_DELIM ':' 51 | 52 | if(NULL == ctxt->input || 0 == *(ctxt->input)) { 53 | return NGX_ERROR; 54 | } 55 | 56 | if(NULL == (ctxt->curr = strsep(&ctxt->input, ctxt->delimiter))) { 57 | return NGX_ERROR; 58 | } 59 | 60 | /* in case the token is a key-val pair then we need to further token parse*/ 61 | if(ctxt->hints && 62 | ctxt->hints[ctxt->num_tokens].param_type & SPHX2_ARG_TYPE_KEYVAL) 63 | { 64 | if(NULL == (ctxt->curr = strchr(ctxt->curr, SPHX2_ARG_KEYVAL_DELIM))) 65 | return NGX_ERROR; 66 | 67 | ctxt->curr = ctxt->curr + 1; /* move to value part */ 68 | } 69 | 70 | ++ctxt->num_tokens; 71 | 72 | return NGX_OK; 73 | } 74 | 75 | /* get a string arg - internally does a 'step' and also a check using 76 | * the hints if available 77 | */ 78 | ngx_str_t* 79 | sphx2_arg_parse_get_str_arg(sphx2_arg_parse_ctx_t * ctxt) 80 | { 81 | ngx_str_t * str; 82 | 83 | if(NULL != ctxt->hints && 84 | SPHX2_ARG_TYPE_STRING != 85 | ctxt->hints[ctxt->num_tokens].param_type) 86 | { 87 | return(NULL); 88 | } 89 | 90 | if(NGX_ERROR == sphx2_arg_step(ctxt)) 91 | return (NULL); 92 | 93 | if(NULL == (str = ngx_palloc(ctxt->pool, sizeof(ngx_str_t)))) { 94 | return(NULL); 95 | } 96 | 97 | str->len = strlen(ctxt->curr)+1; 98 | 99 | if(NULL == (str->data = ngx_palloc(ctxt->pool, str->len))) { 100 | return(NULL); 101 | } 102 | 103 | memcpy(str->data, ctxt->curr, str->len); 104 | 105 | return(str); 106 | } 107 | 108 | /* get an integer arg - internally does a 'step' and also a check using 109 | * the hints if available 110 | */ 111 | uint32_t 112 | sphx2_arg_parse_get_int_arg(sphx2_arg_parse_ctx_t * ctxt) 113 | { 114 | if(NULL != ctxt->hints && 115 | SPHX2_ARG_TYPE_INTEGER != 116 | ctxt->hints[ctxt->num_tokens].param_type) 117 | { 118 | return(NGX_ERROR); 119 | } 120 | 121 | if(NGX_ERROR == sphx2_arg_step(ctxt)) 122 | return (NGX_ERROR); 123 | 124 | return(atoi(ctxt->curr)); 125 | } 126 | 127 | /* get an int64 arg - internally does a 'step' and also a check using 128 | * the hints if available 129 | */ 130 | uint64_t 131 | sphx2_arg_parse_get_int64_arg(sphx2_arg_parse_ctx_t * ctxt) 132 | { 133 | if(NULL != ctxt->hints && 134 | SPHX2_ARG_TYPE_INTEGER64 != 135 | ctxt->hints[ctxt->num_tokens].param_type) 136 | { 137 | return(NGX_ERROR); 138 | } 139 | 140 | if(NGX_ERROR == sphx2_arg_step(ctxt)) 141 | return (NGX_ERROR); 142 | 143 | return(strtoll(ctxt->curr, NULL, 10)); 144 | } 145 | 146 | /* get a float arg - internally does a 'step' and also a check using 147 | * the hints if available 148 | */ 149 | float 150 | sphx2_arg_parse_get_float_arg(sphx2_arg_parse_ctx_t * ctxt) 151 | { 152 | if(NULL != ctxt->hints && 153 | SPHX2_ARG_TYPE_FLOAT != 154 | ctxt->hints[ctxt->num_tokens].param_type) 155 | { 156 | return((float)NGX_ERROR); 157 | } 158 | 159 | if(NGX_ERROR == sphx2_arg_step(ctxt)) 160 | return ((float)NGX_ERROR); 161 | 162 | return(atof(ctxt->curr)); 163 | } 164 | 165 | /* get an enum arg - internally does a 'step' and also a check using 166 | * the hints if available. the matching str-arr if specified overrides 167 | * the hints (if specified). one of the arg here or hint must be 168 | * available. 169 | */ 170 | int32_t 171 | sphx2_arg_parse_get_enum_arg( 172 | sphx2_arg_parse_ctx_t * ctxt, 173 | const char * str_arr[], 174 | size_t sz_str_arr) 175 | { 176 | size_t i; 177 | 178 | if(NULL != ctxt->hints && 179 | SPHX2_ARG_TYPE_ENUM != 180 | ctxt->hints[ctxt->num_tokens].param_type) 181 | { 182 | return(NGX_ERROR); 183 | } 184 | 185 | assert(!(NULL == str_arr && NULL == ctxt->hints)); 186 | 187 | if(NULL == str_arr) { 188 | str_arr = ctxt->hints[ctxt->num_tokens].str_arr; 189 | sz_str_arr = ctxt->hints[ctxt->num_tokens].sz_str_arr; 190 | } 191 | 192 | if(NGX_ERROR == sphx2_arg_step(ctxt)) 193 | return (NGX_ERROR); 194 | 195 | for(i = 0; i < sz_str_arr; ++i) { 196 | if(!strncmp(ctxt->curr, str_arr[i], strlen(str_arr[i]))) { 197 | return (i); 198 | } 199 | } 200 | 201 | return(NGX_ERROR); 202 | } 203 | 204 | /* parse the whole input using the hints and copy the results in the 205 | * order specified by hints array into the target pointer arg assuming 206 | * it is a struct pointer with elements of types specified in hints 207 | * occurring in that order 208 | */ 209 | ngx_int_t 210 | sphx2_arg_parse_whole_using_hints( 211 | sphx2_arg_parse_ctx_t * ctxt, 212 | void * ptr) 213 | { 214 | #define APPEND_4_BYTES(p, v, a) \ 215 | do { \ 216 | memcpy(p, v, sizeof(uint32_t)); \ 217 | p = (void*)((uint64_t)p + sizeof(uint32_t)); \ 218 | a = 1 - a; \ 219 | } while(0) 220 | 221 | #define APPEND_8_BYTES(p, v, a) \ 222 | do { \ 223 | if(0==a) { p = (void*)((uint64_t)p + sizeof(uint32_t)); a = 1;}; \ 224 | memcpy(p, v, sizeof(uint64_t)); \ 225 | p = (void*)((uint64_t)p + sizeof(uint64_t)); \ 226 | } while(0) 227 | 228 | size_t i = 0; 229 | int32_t is_ptr_aligned = 1; 230 | union { uint32_t i; uint64_t i64; float f; ngx_str_t* s; int32_t e; } v; 231 | 232 | assert(NULL != ctxt->hints); 233 | 234 | while(SPHX2_ARG_TYPE_NONE != ctxt->hints[i].param_type) { 235 | 236 | switch(ctxt->hints[i].param_type & SPHX2_ARG_TYPE_MASK) { 237 | 238 | case SPHX2_ARG_TYPE_INTEGER: 239 | if((uint32_t)NGX_ERROR == (v.i = sphx2_arg_parse_get_int_arg(ctxt))) 240 | { 241 | return(NGX_ERROR); 242 | } 243 | APPEND_4_BYTES(ptr, &v.i, is_ptr_aligned); 244 | break; 245 | case SPHX2_ARG_TYPE_INTEGER64: 246 | if((uint64_t)NGX_ERROR == (v.i64 = sphx2_arg_parse_get_int64_arg(ctxt))) 247 | { 248 | return(NGX_ERROR); 249 | } 250 | APPEND_8_BYTES(ptr, &v.i64, is_ptr_aligned); 251 | break; 252 | case SPHX2_ARG_TYPE_FLOAT: 253 | if((float)NGX_ERROR == (v.f = sphx2_arg_parse_get_float_arg(ctxt))) 254 | { 255 | return(NGX_ERROR); 256 | } 257 | APPEND_4_BYTES(ptr, &v.i, is_ptr_aligned); 258 | break; 259 | case SPHX2_ARG_TYPE_STRING: 260 | if(NULL == (v.s = sphx2_arg_parse_get_str_arg(ctxt))) 261 | { 262 | return(NGX_ERROR); 263 | } 264 | APPEND_8_BYTES(ptr, &v.s, is_ptr_aligned); 265 | break; 266 | case SPHX2_ARG_TYPE_ENUM: 267 | if((int32_t)NGX_ERROR == (v.e = sphx2_arg_parse_get_enum_arg(ctxt, 268 | ctxt->hints[i].str_arr, ctxt->hints[i].sz_str_arr))) 269 | { 270 | return(NGX_ERROR); 271 | } 272 | APPEND_4_BYTES(ptr, &v.e, is_ptr_aligned); 273 | break; 274 | default: 275 | assert(0); 276 | } 277 | 278 | ++i; 279 | } 280 | 281 | return (NGX_OK); 282 | } 283 | -------------------------------------------------------------------------------- /src/ngx_http_sphinx2_sphx.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx2 types to work with search daemon 3 | */ 4 | 5 | #ifndef NGX_HTTP_SPHINX2_SPHX_H 6 | #define NGX_HTTP_SPHINX2_SPHX_H 7 | 8 | 9 | /* TYPES */ 10 | 11 | #define SPHX2_CLI_VERSION 1 12 | #define SPHX2_SEARCHD_PROTO 1 13 | 14 | /* Codes of commands to be sent to Sphinx search daemon */ 15 | typedef enum { 16 | SPHX2_COMMAND_NONE = -1, 17 | SPHX2_COMMAND_SEARCH = 0, 18 | SPHX2_COMMAND_EXCERPT = 1, 19 | #if 0 20 | -- not supported as of this release -- 21 | 22 | SPHX2_COMMAND_UPDATE = 2, 23 | SPHX2_COMMAND_KEYWORDS = 3, 24 | SPHX2_COMMAND_PERSIST = 4, 25 | SPHX2_COMMAND_STATUS = 5, 26 | SPHX2_COMMAND_FLUSHATTRS = 7 27 | #endif 28 | SPHX2_COMMAND_COUNT 29 | } sphx2_command_t; 30 | 31 | /* Versions of commands to be sent to Sphinx search daemon */ 32 | typedef enum { 33 | SPHX2_VER_COMMAND_SEARCH = 0x119, 34 | SPHX2_VER_COMMAND_EXCERPT = 0x104 35 | #if 0 36 | -- not supported as of this release -- 37 | SPHX2_VER_COMMAND_UPDATE = 0x102, 38 | SPHX2_VER_COMMAND_KEYWORDS = 0x100, 39 | SPHX2_VER_COMMAND_STATUS = 0x100, 40 | SPHX2_VER_COMMAND_QUERY = 0x100, 41 | SPHX2_VER_COMMAND_FLUSHATTRS = 0x100 42 | #endif 43 | } sphx2_version_no_t; 44 | 45 | /* Return codes from Sphinx search daemon */ 46 | typedef enum { 47 | SPHX2_STATUS_OK = 0, 48 | SPHX2_STATUS_ERROR = 1, 49 | SPHX2_STATUS_RETRY = 2, 50 | SPHX2_STATUS_WARNING = 3 51 | } sphx2_status_t; 52 | 53 | /* Match mode to use while searching */ 54 | typedef enum { 55 | SPHX2_MATCH_ALL = 0, 56 | SPHX2_MATCH_ANY = 1, 57 | SPHX2_MATCH_PHRASE = 2, 58 | SPHX2_MATCH_BOOLEAN = 3, 59 | SPHX2_MATCH_EXTENDED = 4, 60 | SPHX2_MATCH_FULLSCAN = 5, 61 | SPHX2_MATCH_EXTENDED2 = 6 62 | } sphx2_match_mode_t; 63 | 64 | /* Ranker to use while searching */ 65 | typedef enum { 66 | SPHX2_RANK_PROXIMITY_BM25 = 0, 67 | SPHX2_RANK_BM25 = 1, 68 | SPHX2_RANK_NONE = 2, 69 | SPHX2_RANK_WORDCOUNT = 3, 70 | SPHX2_RANK_PROXIMITY = 4, 71 | SPHX2_RANK_MATCHANY = 5, 72 | SPHX2_RANK_FIELDMASK = 6, 73 | SPHX2_RANK_SPH04 = 7, 74 | SPHX2_RANK_EXPR = 8, 75 | SPHX2_RANK_TOTAL = 9 76 | } sphx2_ranker_t; 77 | 78 | /* Mode of sorting results obtained */ 79 | typedef enum { 80 | SPHX2_SORT_RELEVANCE = 0, 81 | SPHX2_SORT_ATTR_DESC = 1, 82 | SPHX2_SORT_ATTR_ASC = 2, 83 | SPHX2_SORT_TIME_SEGMENTS = 3, 84 | SPHX2_SORT_EXTENDED = 4, 85 | SPHX2_SORT_EXPR = 5 86 | } sphx2_sort_mode_t; 87 | 88 | /* Filter type */ 89 | typedef enum { 90 | SPHX2_FILTER_VALUES = 0, 91 | SPHX2_FILTER_RANGE = 1, 92 | SPHX2_FILTER_FLOATRANGE = 2, 93 | } sphx2_filter_type_t; 94 | 95 | /* Group by type */ 96 | typedef enum { 97 | SPHX2_GROUPBY_DAY = 0, 98 | SPHX2_GROUPBY_WEEK = 1, 99 | SPHX2_GROUPBY_MONTH = 2, 100 | SPHX2_GROUPBY_YEAR = 3, 101 | SPHX2_GROUPBY_ATTR = 4, 102 | SPHX2_GROUPBY_ATTRPAIR = 5, 103 | } sphx2_group_type_t; 104 | 105 | /* Output format type */ 106 | typedef enum { 107 | SPHX2_OUTPUT_RAW = 0, 108 | SPHX2_OUTPUT_TEXT = 1, 109 | SPHX2_OUTPUT_JSON = 2, 110 | SPHX2_OUTPUT_XML = 3, 111 | } sphx2_output_type_t; 112 | 113 | /* Search daemon response status */ 114 | typedef enum { 115 | SPHX2_SEARCHD_OK = 0, 116 | SPHX2_SEARCHD_ERROR = 1, 117 | SPHX2_SEARCHD_RETRY = 2, 118 | SPHX2_SEARCHD_WARNING = 3, 119 | } sphx2_searchd_status_t; 120 | 121 | /* Specify weight of a field */ 122 | typedef struct _sphx2_weight sphx2_weight_t; 123 | 124 | struct _sphx2_weight { 125 | ngx_str_t * entity; 126 | uint32_t weight; 127 | sphx2_weight_t * next; 128 | }; 129 | 130 | /* Filter values (int64) 131 | typedef struct _sphx2_filter_value sphx2_filter_value_t; 132 | 133 | struct _sphx2_filter_value { 134 | uint64_t value; 135 | sphx2_filter_value_t * next; 136 | };*/ 137 | 138 | /* Filter range (int64) */ 139 | typedef struct { 140 | uint64_t min; 141 | uint64_t max; 142 | } sphx2_filter_int_range_t; 143 | 144 | /* Filter range (float) */ 145 | typedef struct { 146 | double min; 147 | double max; 148 | } sphx2_filter_float_range_t; 149 | 150 | /* Filter spec */ 151 | typedef union { 152 | /*struct { 153 | sphx2_filter_value_t * v; 154 | ngx_uint_t n; 155 | } vals;*/ 156 | sphx2_filter_int_range_t ir; 157 | sphx2_filter_float_range_t fr; 158 | } sphx2_filter_spec_t; 159 | 160 | /* Filter */ 161 | typedef struct _sphx2_filter sphx2_filter_t; 162 | 163 | struct _sphx2_filter { 164 | ngx_str_t * attr; 165 | int32_t exclude; 166 | sphx2_filter_type_t type; 167 | sphx2_filter_spec_t spec; 168 | sphx2_filter_t * next; 169 | }; 170 | 171 | /* Grouping */ 172 | typedef struct { 173 | sphx2_group_type_t type; 174 | ngx_str_t * attr; 175 | ngx_str_t * sort; 176 | ngx_str_t * distinct; 177 | } sphx2_group_t; 178 | 179 | /* Geo */ 180 | typedef struct { 181 | ngx_str_t * lat_attr; 182 | ngx_str_t * lon_attr; 183 | float lat; 184 | float lon; 185 | } sphx2_geo_t; 186 | 187 | /* Input to search query */ 188 | typedef struct { 189 | uint32_t offset; 190 | uint32_t num_results; 191 | sphx2_match_mode_t match_mode; 192 | sphx2_ranker_t ranker; 193 | ngx_str_t * rank_expr; 194 | sphx2_sort_mode_t sort_mode; 195 | ngx_str_t * sort_by; 196 | ngx_str_t * keywords; 197 | ngx_str_t * index; 198 | uint32_t num_filters; 199 | sphx2_filter_t * filters; 200 | sphx2_group_t * group; 201 | uint32_t max_matches; 202 | sphx2_geo_t * geo; 203 | uint32_t num_index_weights; 204 | sphx2_weight_t * index_weights; 205 | uint32_t num_field_weights; 206 | sphx2_weight_t * field_weights; 207 | sphx2_output_type_t output_type; 208 | } sphx2_search_input_t; 209 | 210 | /* A document */ 211 | typedef struct sphx2_doc_s sphx2_doc_t; 212 | 213 | struct sphx2_doc_s { 214 | ngx_str_t * doc; 215 | sphx2_doc_t * next; 216 | }; 217 | 218 | /* Excerpt opts */ 219 | typedef struct { 220 | ngx_str_t * before_match; 221 | ngx_str_t * after_match; 222 | ngx_str_t * chunk_separator; 223 | uint32_t limit; 224 | uint32_t limit_passages; 225 | uint32_t limit_words; 226 | uint32_t around; 227 | uint32_t exact_phrase; 228 | uint32_t single_passage; 229 | uint32_t use_boundaries; 230 | uint32_t weight_order; 231 | uint32_t query_mode; 232 | uint32_t force_all_words; 233 | uint32_t start_passage_id; 234 | uint32_t load_files; 235 | ngx_str_t * html_strip_mode; 236 | uint32_t allow_empty; 237 | ngx_str_t * passage_boundary; 238 | uint32_t emit_zones; 239 | uint32_t load_files_scattered; 240 | uint32_t opts_flag; 241 | } sphx2_excerpt_opts_t; 242 | 243 | /* Input to excerpt command */ 244 | typedef struct { 245 | ngx_str_t * keywords; 246 | ngx_str_t * index; 247 | uint32_t num_docs; 248 | sphx2_doc_t * docs; 249 | sphx2_excerpt_opts_t * excerpt_opts; 250 | } sphx2_excerpt_input_t; 251 | 252 | /* Input - union */ 253 | typedef union { 254 | sphx2_search_input_t srch; 255 | sphx2_excerpt_input_t exrp; 256 | } sphx2_input_t; 257 | 258 | /* Search response context */ 259 | typedef struct { 260 | uint32_t len; 261 | } sphx2_search_response_ctx_t; 262 | 263 | /* Excerpt command response */ 264 | typedef struct { 265 | uint32_t len; 266 | } sphx2_excerpt_response_ctx_t; 267 | 268 | /* Response context */ 269 | typedef union { 270 | sphx2_search_response_ctx_t srch; 271 | sphx2_excerpt_response_ctx_t exrp; 272 | } sphx2_response_ctx_t; 273 | 274 | 275 | /* FUNCTION PROTOTYPES */ 276 | 277 | /* Parse URL arguments */ 278 | ngx_int_t 279 | sphx2_parse_match_mode_str(ngx_pool_t*, ngx_str_t*, sphx2_match_mode_t*); 280 | 281 | ngx_int_t 282 | sphx2_parse_ranker_str(ngx_pool_t*, ngx_str_t*, sphx2_ranker_t*); 283 | 284 | ngx_int_t 285 | sphx2_parse_sort_mode_str(ngx_pool_t*, ngx_str_t*, sphx2_sort_mode_t*); 286 | 287 | ngx_int_t 288 | sphx2_parse_weights_str(ngx_pool_t*, ngx_str_t*, sphx2_weight_t**, uint32_t*); 289 | 290 | #define sphx2_parse_index_weights_str sphx2_parse_weights_str 291 | #define sphx2_parse_field_weights_str sphx2_parse_weights_str 292 | 293 | ngx_int_t 294 | sphx2_parse_filters_str(ngx_pool_t*, ngx_str_t*, sphx2_filter_t**, uint32_t*); 295 | 296 | ngx_int_t 297 | sphx2_parse_group_str(ngx_pool_t*, ngx_str_t*, sphx2_group_t**); 298 | 299 | ngx_int_t 300 | sphx2_parse_output_type_str(ngx_pool_t*, ngx_str_t*, sphx2_output_type_t*); 301 | 302 | ngx_int_t 303 | sphx2_parse_geo_str(ngx_pool_t*, ngx_str_t*, sphx2_geo_t**); 304 | 305 | ngx_int_t 306 | sphx2_parse_docs_str(ngx_pool_t*, ngx_str_t*, sphx2_doc_t**, uint32_t*); 307 | 308 | ngx_int_t 309 | sphx2_parse_excerpt_opts_str(ngx_pool_t*, ngx_str_t*, sphx2_excerpt_opts_t**); 310 | 311 | void 312 | sphx2_create_opts_flag(sphx2_excerpt_opts_t*); 313 | 314 | 315 | /* Requests & Responses */ 316 | ngx_int_t 317 | sphx2_create_search_request(ngx_pool_t*, sphx2_search_input_t*, ngx_buf_t**); 318 | 319 | ngx_int_t 320 | sphx2_parse_search_response_header(ngx_pool_t*, ngx_buf_t*, 321 | sphx2_search_response_ctx_t*); 322 | 323 | ngx_int_t 324 | sphx2_create_excerpt_request(ngx_pool_t*, sphx2_excerpt_input_t*, ngx_buf_t**); 325 | 326 | ngx_int_t 327 | sphx2_parse_excerpt_response_header(ngx_pool_t*, ngx_buf_t*, 328 | sphx2_excerpt_response_ctx_t*); 329 | 330 | /* GLOBALS */ 331 | 332 | extern sphx2_match_mode_t sphx2_default_match_mode; 333 | 334 | extern sphx2_ranker_t sphx2_default_ranker; 335 | 336 | extern sphx2_sort_mode_t sphx2_default_sort_mode; 337 | 338 | extern sphx2_output_type_t sphx2_default_output_type; 339 | 340 | extern ngx_uint_t sphx2_default_num_results; 341 | 342 | extern ngx_uint_t sphx2_default_max_matches; 343 | 344 | extern size_t sphx2_min_search_header_len; 345 | 346 | #endif /* NGX_HTTP_SPHINX2_SPHX_H */ 347 | -------------------------------------------------------------------------------- /src/ngx_http_sphinx2_module.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx2 upstream module 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "ngx_http_sphinx2_sphx.h" 9 | 10 | /* TYPES */ 11 | 12 | typedef enum { 13 | SPHX2_ARG_OFFSET = 0, 14 | SPHX2_ARG_NUM_RESULTS, 15 | SPHX2_ARG_MATCH_MODE, 16 | SPHX2_ARG_RANKER, 17 | SPHX2_ARG_RANK_EXPR, 18 | SPHX2_ARG_SORT_MODE, 19 | SPHX2_ARG_SORT_BY, 20 | SPHX2_ARG_KEYWORDS, 21 | SPHX2_ARG_INDEX, 22 | SPHX2_ARG_FILTERS, 23 | SPHX2_ARG_GROUP, 24 | SPHX2_ARG_MAX_MATCHES, 25 | SPHX2_ARG_GEO, 26 | SPHX2_ARG_INDEX_WEIGHTS, 27 | SPHX2_ARG_FIELD_WEIGHTS, 28 | SPHX2_ARG_OUTPUT_FORMAT, 29 | SPHX2_ARG_DOCS, 30 | SPHX2_ARG_EXCERPT_OPTS, 31 | SPHX2_ARG_COUNT 32 | } sphx2_args_t; 33 | 34 | typedef struct { 35 | ngx_http_upstream_conf_t upstream; 36 | ngx_int_t cmd_idx; 37 | ngx_int_t arg_idx[SPHX2_ARG_COUNT]; 38 | } ngx_http_sphinx2_loc_conf_t; 39 | 40 | typedef struct { 41 | ngx_http_request_t * request; 42 | sphx2_command_t command; 43 | sphx2_response_ctx_t repctx; 44 | } ngx_http_sphinx2_ctx_t; 45 | 46 | 47 | /* PROTOTYPES */ 48 | 49 | static void * ngx_http_sphinx2_create_loc_conf(ngx_conf_t *cf); 50 | static char * ngx_http_sphinx2_merge_loc_conf(ngx_conf_t *cf, void 51 | *parent, void *child); 52 | 53 | static ngx_int_t ngx_http_sphinx2_handler(ngx_http_request_t *r); 54 | static ngx_int_t ngx_http_sphinx2_create_request(ngx_http_request_t *r); 55 | static ngx_int_t ngx_http_sphinx2_reinit_request(ngx_http_request_t *r); 56 | static ngx_int_t ngx_http_sphinx2_process_header(ngx_http_request_t *r); 57 | #if 0 58 | static ngx_int_t ngx_http_sphinx2_filter_init(void *data); 59 | static ngx_int_t ngx_http_sphinx2_filter(void *data, ssize_t bytes); 60 | #endif 61 | static void ngx_http_sphinx2_abort_request(ngx_http_request_t *r); 62 | static void ngx_http_sphinx2_finalize_request(ngx_http_request_t *r, 63 | ngx_int_t rc); 64 | 65 | static char * ngx_http_sphinx2_pass(ngx_conf_t *cf, ngx_command_t *cmd, 66 | void *conf); 67 | 68 | /* LOCALS */ 69 | 70 | static ngx_str_t ngx_http_sphinx2_command = ngx_string("sphinx2_command"); 71 | 72 | static ngx_str_t ngx_http_sphinx2_args[] = { 73 | ngx_string("sphx_offset"), /* SPHX2_ARG_OFFSET */ 74 | ngx_string("sphx_numresults"), /* SPHX2_ARG_NUM_RESULTS */ 75 | ngx_string("sphx_matchmode"), /* SPHX2_ARG_MATCH_MODE */ 76 | ngx_string("sphx_ranker"), /* SPHX2_ARG_RANKER */ 77 | ngx_string("sphx_rankexpr"), /* SPHX2_ARG_RANK_EXPR */ 78 | ngx_string("sphx_sortmode"), /* SPHX2_ARG_SORT_MODE */ 79 | ngx_string("sphx_sortby"), /* SPHX2_ARG_SORT_BY */ 80 | ngx_string("sphx_keywords"), /* SPHX2_ARG_KEYWORDS */ 81 | ngx_string("sphx_index"), /* SPHX2_ARG_INDEX */ 82 | ngx_string("sphx_filters"), /* SPHX2_ARG_FILTERS */ 83 | ngx_string("sphx_group"), /* SPHX2_ARG_GROUP */ 84 | ngx_string("sphx_maxmatches"), /* SPHX2_ARG_MAX_MATCHES */ 85 | ngx_string("sphx_geo"), /* SPHX2_ARG_GEO */ 86 | ngx_string("sphx_indexweights"), /* SPHX2_ARG_INDEX_WEIGHTS */ 87 | ngx_string("sphx_fieldweights"), /* SPHX2_ARG_FIELD_WEIGHTS */ 88 | ngx_string("sphx_outputtype"), /* SPHX2_ARG_OUTPUT_FORMAT */ 89 | ngx_string("sphx_docs"), /* SPHX2_ARG_DOCS */ 90 | ngx_string("sphx_excerpt_opts"), /* SPHX2_ARG_EXCERPT_OPTS */ 91 | }; 92 | 93 | static const char* sphx2_command_strs[] = { 94 | "search", /* SPHX2_COMMAND_SEARCH = 0, */ 95 | "excerpt", /* SPHX2_COMMAND_EXCERPT = 1 */ 96 | NULL 97 | }; 98 | 99 | /* MODULE GLOBALS */ 100 | 101 | static ngx_conf_bitmask_t ngx_http_sphinx2_next_upstream_masks[] = { 102 | { ngx_string("error"), NGX_HTTP_UPSTREAM_FT_ERROR }, 103 | { ngx_string("timeout"), NGX_HTTP_UPSTREAM_FT_TIMEOUT }, 104 | { ngx_string("invalid_response"), NGX_HTTP_UPSTREAM_FT_INVALID_HEADER }, 105 | { ngx_string("not_found"), NGX_HTTP_UPSTREAM_FT_HTTP_404 }, 106 | { ngx_string("off"), NGX_HTTP_UPSTREAM_FT_OFF }, 107 | { ngx_null_string, 0 } 108 | }; 109 | 110 | 111 | static ngx_command_t ngx_http_sphinx2_commands[] = { 112 | 113 | /* module specific commands */ 114 | { ngx_string("sphinx2_pass"), 115 | NGX_HTTP_LOC_CONF|NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1, 116 | ngx_http_sphinx2_pass, 117 | NGX_HTTP_LOC_CONF_OFFSET, 118 | 0, 119 | NULL }, 120 | 121 | /* standard ones for upstream module */ 122 | { ngx_string("sphinx2_bind"), 123 | NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, 124 | ngx_http_upstream_bind_set_slot, 125 | NGX_HTTP_LOC_CONF_OFFSET, 126 | offsetof(ngx_http_sphinx2_loc_conf_t, upstream.local), 127 | NULL }, 128 | 129 | { ngx_string("sphinx2_connect_timeout"), 130 | NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, 131 | ngx_conf_set_msec_slot, 132 | NGX_HTTP_LOC_CONF_OFFSET, 133 | offsetof(ngx_http_sphinx2_loc_conf_t, upstream.connect_timeout), 134 | NULL }, 135 | 136 | { ngx_string("sphinx2_send_timeout"), 137 | NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, 138 | ngx_conf_set_msec_slot, 139 | NGX_HTTP_LOC_CONF_OFFSET, 140 | offsetof(ngx_http_sphinx2_loc_conf_t, upstream.send_timeout), 141 | NULL }, 142 | 143 | { ngx_string("sphinx2_buffer_size"), 144 | NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, 145 | ngx_conf_set_size_slot, 146 | NGX_HTTP_LOC_CONF_OFFSET, 147 | offsetof(ngx_http_sphinx2_loc_conf_t, upstream.buffer_size), 148 | NULL }, 149 | 150 | { ngx_string("sphinx2_read_timeout"), 151 | NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE1, 152 | ngx_conf_set_msec_slot, 153 | NGX_HTTP_LOC_CONF_OFFSET, 154 | offsetof(ngx_http_sphinx2_loc_conf_t, upstream.read_timeout), 155 | NULL }, 156 | 157 | { ngx_string("sphinx2_next_upstream"), 158 | NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE, 159 | ngx_conf_set_bitmask_slot, 160 | NGX_HTTP_LOC_CONF_OFFSET, 161 | offsetof(ngx_http_sphinx2_loc_conf_t, upstream.upstream), 162 | &ngx_http_sphinx2_next_upstream_masks }, 163 | 164 | ngx_null_command 165 | }; 166 | 167 | 168 | static ngx_http_module_t ngx_http_sphinx2_module_ctx = { 169 | NULL, /* preconfiguration */ 170 | NULL, /* postconfiguration */ 171 | 172 | NULL, /* create main configuration */ 173 | NULL, /* init main configuration */ 174 | 175 | NULL, /* create server configuration */ 176 | NULL, /* merge server configuration */ 177 | 178 | ngx_http_sphinx2_create_loc_conf, /* create location configration */ 179 | ngx_http_sphinx2_merge_loc_conf /* merge location configration */ 180 | }; 181 | 182 | 183 | ngx_module_t ngx_http_sphinx2_module = { 184 | NGX_MODULE_V1, 185 | &ngx_http_sphinx2_module_ctx, /* module context */ 186 | ngx_http_sphinx2_commands, /* module directives */ 187 | NGX_HTTP_MODULE, /* module type */ 188 | NULL, /* init master */ 189 | NULL, /* init module */ 190 | NULL, /* init process */ 191 | NULL, /* init thread */ 192 | NULL, /* exit thread */ 193 | NULL, /* exit process */ 194 | NULL, /* exit master */ 195 | NGX_MODULE_V1_PADDING 196 | }; 197 | 198 | 199 | /* FUNCTION DEFINITIONS */ 200 | 201 | /* location conf creation */ 202 | static void* 203 | ngx_http_sphinx2_create_loc_conf(ngx_conf_t *cf) 204 | { 205 | ngx_http_sphinx2_loc_conf_t *conf; 206 | size_t i; 207 | 208 | if(NULL == (conf = ngx_pcalloc(cf->pool, 209 | sizeof(ngx_http_sphinx2_loc_conf_t)))) 210 | { 211 | return NULL; 212 | } 213 | 214 | /* 215 | * set by ngx_pcalloc(): 216 | * 217 | * conf->upstream.bufs.num = 0; 218 | * conf->upstream.upstream = 0; 219 | * conf->upstream.temp_path = NULL; 220 | * conf->upstream.uri = { 0, NULL }; 221 | * conf->upstream.location = NULL; 222 | */ 223 | 224 | conf->upstream.connect_timeout = NGX_CONF_UNSET_MSEC; 225 | conf->upstream.send_timeout = NGX_CONF_UNSET_MSEC; 226 | conf->upstream.read_timeout = NGX_CONF_UNSET_MSEC; 227 | 228 | conf->upstream.buffer_size = NGX_CONF_UNSET_SIZE; 229 | 230 | /* the hardcoded values */ 231 | conf->upstream.cyclic_temp_file = 0; 232 | conf->upstream.buffering = 0; 233 | conf->upstream.ignore_client_abort = 0; 234 | conf->upstream.send_lowat = 0; 235 | conf->upstream.bufs.num = 0; 236 | conf->upstream.busy_buffers_size = 0; 237 | conf->upstream.max_temp_file_size = 0; 238 | conf->upstream.temp_file_write_size = 0; 239 | conf->upstream.intercept_errors = 1; 240 | conf->upstream.intercept_404 = 1; 241 | conf->upstream.pass_request_headers = 0; 242 | conf->upstream.pass_request_body = 0; 243 | 244 | /* initialize module specific elements of the context */ 245 | conf->cmd_idx = NGX_CONF_UNSET; 246 | for(i = 0; i < SPHX2_ARG_COUNT; ++i) { 247 | conf->arg_idx[i] = NGX_CONF_UNSET; 248 | } 249 | 250 | return conf; 251 | } 252 | 253 | /* location conf merge */ 254 | static char* 255 | ngx_http_sphinx2_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) 256 | { 257 | ngx_http_sphinx2_loc_conf_t *prev = parent; 258 | ngx_http_sphinx2_loc_conf_t *conf = child; 259 | size_t i; 260 | 261 | ngx_conf_merge_msec_value(conf->upstream.connect_timeout, 262 | prev->upstream.connect_timeout, 60000); 263 | ngx_conf_merge_msec_value(conf->upstream.send_timeout, 264 | prev->upstream.send_timeout, 60000); 265 | ngx_conf_merge_msec_value(conf->upstream.read_timeout, 266 | prev->upstream.read_timeout, 60000); 267 | 268 | ngx_conf_merge_size_value(conf->upstream.buffer_size, 269 | prev->upstream.buffer_size, (size_t)ngx_pagesize); 270 | 271 | ngx_conf_merge_bitmask_value(conf->upstream.next_upstream, 272 | prev->upstream.next_upstream, 273 | (NGX_CONF_BITMASK_SET | 274 | NGX_HTTP_UPSTREAM_FT_ERROR | 275 | NGX_HTTP_UPSTREAM_FT_TIMEOUT)); 276 | 277 | if (conf->upstream.next_upstream & NGX_HTTP_UPSTREAM_FT_OFF) { 278 | conf->upstream.next_upstream = 279 | NGX_CONF_BITMASK_SET | NGX_HTTP_UPSTREAM_FT_OFF; 280 | } 281 | 282 | if (conf->upstream.upstream == NULL) { 283 | conf->upstream.upstream = prev->upstream.upstream; 284 | } 285 | 286 | if(conf->cmd_idx == NGX_CONF_UNSET) { 287 | conf->cmd_idx = prev->cmd_idx; 288 | } 289 | for(i = 0; i < SPHX2_ARG_COUNT; ++i) { 290 | if(conf->arg_idx[i] == NGX_CONF_UNSET) { 291 | conf->arg_idx[i] = prev->arg_idx[i]; 292 | } 293 | } 294 | 295 | return NGX_CONF_OK; 296 | } 297 | 298 | /* pass */ 299 | static char* 300 | ngx_http_sphinx2_pass(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) 301 | { 302 | ngx_http_sphinx2_loc_conf_t *slcf = conf; 303 | ngx_str_t *value; 304 | ngx_url_t url; 305 | ngx_http_core_loc_conf_t *clcf; 306 | size_t i; 307 | 308 | if (slcf->upstream.upstream) { 309 | return "is duplicate"; 310 | } 311 | 312 | value = cf->args->elts; 313 | 314 | ngx_memzero(&url, sizeof(ngx_url_t)); 315 | 316 | url.url = value[1]; 317 | url.no_resolve = 1; 318 | 319 | slcf->upstream.upstream = ngx_http_upstream_add(cf, &url, 0); 320 | if (slcf->upstream.upstream == NULL) { 321 | return NGX_CONF_ERROR; 322 | } 323 | 324 | clcf = ngx_http_conf_get_module_loc_conf(cf, ngx_http_core_module); 325 | 326 | clcf->handler = ngx_http_sphinx2_handler; 327 | 328 | if (clcf->name.data[clcf->name.len - 1] == '/') { 329 | clcf->auto_redirect = 1; 330 | } 331 | 332 | if(NGX_ERROR == (slcf->cmd_idx = ngx_http_get_variable_index( 333 | cf, &ngx_http_sphinx2_command))) 334 | { 335 | ngx_log_error(NGX_LOG_ERR, cf->log, 0, 336 | "Can't get variable index for 'sphinx2_command'"); 337 | return NGX_CONF_ERROR; 338 | } 339 | 340 | for(i = 0; i < SPHX2_ARG_COUNT; ++i) { 341 | if(NGX_ERROR == (slcf->arg_idx[i] = ngx_http_get_variable_index( 342 | cf, &ngx_http_sphinx2_args[i]))) 343 | { 344 | ngx_log_error(NGX_LOG_ERR, cf->log, 0, 345 | "Can't get variable index for '%s' key", 346 | ngx_http_sphinx2_args[i].data); 347 | return NGX_CONF_ERROR; 348 | } 349 | } 350 | 351 | return NGX_CONF_OK; 352 | } 353 | 354 | /* upstream handler to provide the callbacks */ 355 | ngx_int_t 356 | ngx_http_sphinx2_handler(ngx_http_request_t *r) 357 | { 358 | ngx_int_t rc; 359 | ngx_http_upstream_t *u; 360 | ngx_http_sphinx2_ctx_t *ctx; 361 | ngx_http_sphinx2_loc_conf_t *slcf; 362 | 363 | if (!(r->method & (NGX_HTTP_GET|NGX_HTTP_HEAD))) { 364 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 365 | "sphinx2_handler: http method is not GET or HEAD"); 366 | return NGX_HTTP_NOT_ALLOWED; 367 | } 368 | 369 | if (ngx_http_set_content_type(r) != NGX_OK) { 370 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 371 | "sphinx2_handler: failed in set_content_type"); 372 | return NGX_HTTP_INTERNAL_SERVER_ERROR; 373 | } 374 | 375 | if (ngx_http_upstream_create(r) != NGX_OK) { 376 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 377 | "sphinx2_handler: failed to create upstream"); 378 | return NGX_HTTP_INTERNAL_SERVER_ERROR; 379 | } 380 | 381 | u = r->upstream; 382 | 383 | /*ngx_str_set(&u->schema, "sphinx2://"); */ 384 | ngx_str_set(&u->schema, ""); 385 | u->output.tag = (ngx_buf_tag_t) &ngx_http_sphinx2_module; 386 | 387 | slcf = ngx_http_get_module_loc_conf(r, ngx_http_sphinx2_module); 388 | 389 | u->conf = &slcf->upstream; 390 | 391 | u->create_request = ngx_http_sphinx2_create_request; 392 | u->reinit_request = ngx_http_sphinx2_reinit_request; 393 | u->process_header = ngx_http_sphinx2_process_header; 394 | u->abort_request = ngx_http_sphinx2_abort_request; 395 | u->finalize_request = ngx_http_sphinx2_finalize_request; 396 | 397 | ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_sphinx2_ctx_t)); 398 | if (ctx == NULL) { 399 | return NGX_HTTP_INTERNAL_SERVER_ERROR; 400 | } 401 | 402 | /* TODO upstream context's sphinx specific members init here */ 403 | 404 | ngx_http_set_ctx(r, ctx, ngx_http_sphinx2_module); 405 | 406 | /*u->input_filter_init = ngx_http_sphinx2_filter_init; 407 | u->input_filter = ngx_http_sphinx2_filter; 408 | u->input_filter_ctx = ctx;*/ 409 | 410 | rc = ngx_http_read_client_request_body(r, ngx_http_upstream_init); 411 | 412 | if (rc >= NGX_HTTP_SPECIAL_RESPONSE) { 413 | return rc; 414 | } 415 | 416 | return NGX_DONE; 417 | } 418 | 419 | /* Macros for argument parsing code for search, excerpt etc. */ 420 | 421 | #define GET_INDEXED_VARIABLE_VAL(r, slcf, arg_no) \ 422 | ngx_int_t i = slcf->arg_idx[arg_no]; \ 423 | ngx_http_variable_value_t * vv = ngx_http_get_indexed_variable(r, i); \ 424 | if (vv == NULL || vv->not_found) { \ 425 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, \ 426 | "'%s' variable is not set", ngx_http_sphinx2_args[arg_no].data); \ 427 | return NGX_ERROR; \ 428 | } \ 429 | ngx_str_t* vvs = ngx_palloc(r->pool, sizeof(ngx_str_t)); \ 430 | if(NULL == vvs) { return NGX_ERROR; } \ 431 | if(vv->len != 0) { \ 432 | if(NULL == (vvs->data = ngx_palloc(r->pool, vv->len))) { \ 433 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, \ 434 | "Failed to allocate while getting indexed variable"); \ 435 | return NGX_ERROR; \ 436 | } \ 437 | memcpy(vvs->data, vv->data, vv->len); \ 438 | } else { vvs->data = NULL; } \ 439 | vvs->len = vv->len; 440 | 441 | #define MUST_HAVE_ARG(arg_no) \ 442 | do { \ 443 | GET_INDEXED_VARIABLE_VAL(r, slcf, arg_no); \ 444 | if(0 == vvs->len) { \ 445 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, \ 446 | "No value for arg '%s' specified\n", \ 447 | ngx_http_sphinx2_args[arg_no].data); \ 448 | return NGX_ERROR; \ 449 | } \ 450 | } while(0) 451 | 452 | #define GET_ARG(arg_no, var) \ 453 | do { \ 454 | GET_INDEXED_VARIABLE_VAL(r, slcf, arg_no); \ 455 | input->var = vvs; \ 456 | } while(0) 457 | 458 | #define PARSE_INT_ARG(arg_no, var, dflt) \ 459 | do { \ 460 | GET_INDEXED_VARIABLE_VAL(r, slcf, arg_no); \ 461 | if(vvs->len != 0) { \ 462 | input->var = atoi((const char*)(vvs->data)); \ 463 | } else { input->var = dflt; } \ 464 | } while(0) 465 | 466 | #define PARSE_LIST_ARG(arg_no, key) \ 467 | do { \ 468 | GET_INDEXED_VARIABLE_VAL(r, slcf, arg_no); \ 469 | if(vvs->len != 0 && NGX_OK != sphx2_parse_ ## key ## _str( \ 470 | r->pool, vvs, &(input->key), &(input->num_ ## key))) \ 471 | { return NGX_ERROR; } \ 472 | } while(0) 473 | 474 | #define PARSE_ELEM_ARG(arg_no, key) \ 475 | do { \ 476 | GET_INDEXED_VARIABLE_VAL(r, slcf, arg_no); \ 477 | if(vvs->len != 0 && NGX_OK != sphx2_parse_ ## key ## _str( \ 478 | r->pool, vvs, &input->key)) \ 479 | { return NGX_ERROR; } \ 480 | } while(0) 481 | 482 | #define PARSE_ELEM_ARG_2(arg_no, key) \ 483 | do { \ 484 | GET_INDEXED_VARIABLE_VAL(r, slcf, arg_no); \ 485 | if(NGX_OK != sphx2_parse_ ## key ## _str( \ 486 | r->pool, vvs, &input->key)) \ 487 | { return NGX_ERROR; } \ 488 | } while(0) 489 | 490 | /* parse search arguments */ 491 | 492 | static ngx_str_t s_empty_str = ngx_null_string; 493 | 494 | static ngx_int_t 495 | ngx_http_sphinx2_parse_search_args( 496 | ngx_http_request_t * r, 497 | ngx_http_sphinx2_loc_conf_t * slcf, 498 | sphx2_search_input_t * input) 499 | { 500 | MUST_HAVE_ARG(SPHX2_ARG_KEYWORDS); 501 | 502 | MUST_HAVE_ARG(SPHX2_ARG_INDEX); 503 | 504 | /* offset */ 505 | PARSE_INT_ARG(SPHX2_ARG_OFFSET, offset, 0); 506 | 507 | /* num-results */ 508 | PARSE_INT_ARG(SPHX2_ARG_NUM_RESULTS, num_results, 509 | sphx2_default_num_results); 510 | 511 | /* match mode */ 512 | PARSE_ELEM_ARG(SPHX2_ARG_MATCH_MODE, match_mode); 513 | 514 | /* ranker */ 515 | PARSE_ELEM_ARG(SPHX2_ARG_RANKER, ranker); 516 | 517 | /* rank expression */ 518 | if(SPHX2_RANK_EXPR == input->ranker) { 519 | GET_ARG(SPHX2_ARG_RANK_EXPR, rank_expr); 520 | } 521 | 522 | /* sort_mode */ 523 | PARSE_ELEM_ARG(SPHX2_ARG_SORT_MODE, sort_mode); 524 | 525 | /* sort by */ 526 | if(SPHX2_SORT_ATTR_ASC == input->sort_mode || 527 | SPHX2_SORT_ATTR_DESC == input->sort_mode) 528 | { 529 | GET_ARG(SPHX2_ARG_SORT_BY, sort_by); 530 | } else { 531 | input->sort_by = &s_empty_str; 532 | } 533 | 534 | /* keywords */ 535 | GET_ARG(SPHX2_ARG_KEYWORDS, keywords); 536 | 537 | /* index */ 538 | GET_ARG(SPHX2_ARG_INDEX, index); 539 | 540 | /* filters */ 541 | PARSE_LIST_ARG(SPHX2_ARG_FILTERS, filters); 542 | 543 | /* group */ 544 | PARSE_ELEM_ARG_2(SPHX2_ARG_GROUP, group); 545 | 546 | /* max matches */ 547 | PARSE_INT_ARG(SPHX2_ARG_MAX_MATCHES, max_matches, 548 | sphx2_default_max_matches); 549 | 550 | /* geo */ 551 | PARSE_ELEM_ARG(SPHX2_ARG_GEO, geo); 552 | 553 | /* index weights */ 554 | PARSE_LIST_ARG(SPHX2_ARG_INDEX_WEIGHTS, index_weights); 555 | 556 | /* field weights */ 557 | PARSE_LIST_ARG(SPHX2_ARG_FIELD_WEIGHTS, field_weights); 558 | 559 | /* Output format */ 560 | PARSE_ELEM_ARG(SPHX2_ARG_OUTPUT_FORMAT, output_type); 561 | 562 | return(NGX_OK); 563 | } 564 | 565 | static ngx_int_t 566 | ngx_http_sphinx2_parse_excerpt_args( 567 | ngx_http_request_t * r, 568 | ngx_http_sphinx2_loc_conf_t * slcf, 569 | sphx2_excerpt_input_t * input) 570 | { 571 | MUST_HAVE_ARG(SPHX2_ARG_KEYWORDS); 572 | 573 | MUST_HAVE_ARG(SPHX2_ARG_INDEX); 574 | 575 | /* keywords */ 576 | GET_ARG(SPHX2_ARG_KEYWORDS, keywords); 577 | 578 | /* index */ 579 | GET_ARG(SPHX2_ARG_INDEX, index); 580 | 581 | /* docs */ 582 | PARSE_LIST_ARG(SPHX2_ARG_DOCS, docs); 583 | 584 | /* excerpt opts */ 585 | PARSE_ELEM_ARG_2(SPHX2_ARG_EXCERPT_OPTS, excerpt_opts); 586 | 587 | sphx2_create_opts_flag(input->excerpt_opts); 588 | 589 | return(NGX_OK); 590 | } 591 | 592 | 593 | /* create request callback */ 594 | static ngx_int_t 595 | ngx_http_sphinx2_create_request(ngx_http_request_t *r) 596 | { 597 | ngx_buf_t * b; 598 | ngx_chain_t * cl; 599 | ngx_http_sphinx2_loc_conf_t * slcf; 600 | ngx_http_sphinx2_ctx_t * ctx; 601 | ngx_http_variable_value_t * vv; 602 | sphx2_input_t input; 603 | sphx2_command_t cmd; 604 | ngx_str_t dbg; 605 | 606 | slcf = ngx_http_get_module_loc_conf(r, ngx_http_sphinx2_module); 607 | 608 | /* find the sphinx2 command */ 609 | vv = ngx_http_get_indexed_variable(r, slcf->cmd_idx); 610 | 611 | if (vv == NULL || vv->not_found || vv->len == 0) { 612 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 613 | "the \"sphinx2_command\" variable is not set"); 614 | return NGX_ERROR; 615 | } 616 | 617 | for(cmd = SPHX2_COMMAND_SEARCH; cmd < SPHX2_COMMAND_COUNT; ++cmd) { 618 | if(!strncmp(sphx2_command_strs[cmd], 619 | (const char*)vv->data, vv->len)) { 620 | break; 621 | } 622 | } 623 | 624 | if(cmd == SPHX2_COMMAND_COUNT) { 625 | dbg.data = vv->data; dbg.len = vv->len; 626 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 627 | "Sphinx command \"%V\" is not recognized", &dbg); 628 | return(NGX_ERROR); 629 | } 630 | 631 | ctx = ngx_http_get_module_ctx(r, ngx_http_sphinx2_module); 632 | 633 | switch(cmd) { 634 | case SPHX2_COMMAND_SEARCH: 635 | memset(&input, 0, sizeof(input)); 636 | if(NGX_OK != ngx_http_sphinx2_parse_search_args( 637 | r, slcf, &input.srch)) { 638 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 639 | "Sphinx2 query args parse error"); 640 | return(NGX_ERROR); 641 | } 642 | if(NGX_ERROR == sphx2_create_search_request(r->pool, 643 | &input.srch, &b)) 644 | { 645 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 646 | "Sphinx2 upstream search req creation failed"); 647 | return(NGX_ERROR); 648 | } 649 | break; 650 | case SPHX2_COMMAND_EXCERPT: 651 | memset(&input, 0, sizeof(input)); 652 | if(NGX_OK != ngx_http_sphinx2_parse_excerpt_args( 653 | r, slcf, &input.exrp)) { 654 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 655 | "Sphinx2 query args parse error"); 656 | return(NGX_ERROR); 657 | } 658 | if(NGX_ERROR == sphx2_create_excerpt_request(r->pool, 659 | &input.exrp, &b)) 660 | { 661 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 662 | "Sphinx2 upstream search req creation failed"); 663 | return(NGX_ERROR); 664 | } 665 | break; 666 | default: 667 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 668 | "Sphinx2 upstream unsupported req type - %u", cmd); 669 | return NGX_ERROR; 670 | } 671 | 672 | ctx->command = cmd; 673 | 674 | cl = ngx_alloc_chain_link(r->pool); 675 | if (cl == NULL) { 676 | return NGX_ERROR; 677 | } 678 | 679 | cl->buf = b; 680 | cl->next = NULL; 681 | 682 | r->upstream->request_bufs = cl; 683 | 684 | ctx->request = r; 685 | 686 | dbg.data = b->pos; 687 | dbg.len = b->last - b->pos; 688 | 689 | ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, 690 | "sphinx2 request: \"%V\"", &dbg); 691 | 692 | return NGX_OK; 693 | } 694 | 695 | 696 | static ngx_int_t 697 | ngx_http_sphinx2_reinit_request(ngx_http_request_t *r) 698 | { 699 | return NGX_OK; 700 | } 701 | 702 | 703 | static ngx_int_t 704 | ngx_http_sphinx2_process_header(ngx_http_request_t *r) 705 | { 706 | ngx_http_upstream_t * u; 707 | ngx_http_sphinx2_ctx_t * ctx; 708 | ngx_buf_t * b; 709 | ngx_int_t status; 710 | 711 | u = r->upstream; 712 | b = &u->buffer; 713 | 714 | /* not enough bytes read to parse status */ 715 | if((b->last - b->pos) < (ssize_t)sphx2_min_search_header_len) { 716 | return(NGX_AGAIN); 717 | } 718 | 719 | ctx = ngx_http_get_module_ctx(r, ngx_http_sphinx2_module); 720 | 721 | switch(ctx->command) { 722 | case SPHX2_COMMAND_SEARCH: 723 | if(NGX_OK != (status = 724 | sphx2_parse_search_response_header(r->pool, b, &ctx->repctx.srch))) 725 | { 726 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 727 | "Sphinx2 upstream error processing search response header"); 728 | return status; 729 | } 730 | break; 731 | case SPHX2_COMMAND_EXCERPT: 732 | if(NGX_OK != (status = 733 | sphx2_parse_excerpt_response_header(r->pool, b, &ctx->repctx.exrp))) 734 | { 735 | ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 736 | "Sphinx2 upstream error processing excerpt response header"); 737 | return status; 738 | } 739 | break; 740 | default: 741 | return(NGX_ERROR); 742 | } 743 | 744 | u->headers_in.status_n = NGX_HTTP_OK; 745 | u->state->status = NGX_HTTP_OK; 746 | 747 | return NGX_OK; 748 | } 749 | 750 | #if 0 751 | static ngx_int_t 752 | ngx_http_sphinx2_filter_init(void *data) 753 | { 754 | return NGX_OK; 755 | } 756 | 757 | 758 | static ngx_int_t 759 | ngx_http_sphinx2_filter(void *data, ssize_t bytes) 760 | { 761 | /*ngx_http_sphinx2_ctx_t *ctx = data;*/ 762 | 763 | /* TODO filter impl here */ 764 | return 0; /*ctx->filter(ctx, bytes);*/ 765 | } 766 | #endif 767 | 768 | static void 769 | ngx_http_sphinx2_abort_request(ngx_http_request_t *r) 770 | { 771 | ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, 772 | "abort http sphinx2 request"); 773 | return; 774 | } 775 | 776 | 777 | static void 778 | ngx_http_sphinx2_finalize_request(ngx_http_request_t *r, ngx_int_t rc) 779 | { 780 | ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, 781 | "finalize http sphinx2 request"); 782 | return; 783 | } 784 | -------------------------------------------------------------------------------- /src/ngx_http_sphinx2_sphx.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sphinx2 protocol functionality 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "ngx_http_sphinx2_sphx.h" 10 | #include "ngx_http_sphinx2_args_parser.h" 11 | #include "ngx_http_sphinx2_stream.h" 12 | 13 | 14 | /* MACROS */ 15 | 16 | #define LIST_ADD(l, n, sz) { (n)->next = (l); (l) = (n); ++(sz); } 17 | 18 | /* GLOBALS */ 19 | 20 | sphx2_match_mode_t sphx2_default_match_mode = SPHX2_MATCH_ALL; 21 | 22 | sphx2_ranker_t sphx2_default_ranker = SPHX2_RANK_PROXIMITY_BM25; 23 | 24 | sphx2_sort_mode_t sphx2_default_sort_mode = SPHX2_SORT_RELEVANCE; 25 | 26 | sphx2_output_type_t sphx2_default_output_type = SPHX2_OUTPUT_RAW; 27 | 28 | ngx_uint_t sphx2_default_num_results = 50; 29 | 30 | ngx_uint_t sphx2_default_max_matches = 1000; 31 | 32 | size_t sphx2_min_search_header_len = 12; /* hs (4) + hdr(8) */ 33 | 34 | /* LOCAL GLOBALS */ 35 | 36 | static const char* s_match_mode_strs[] = { 37 | "all", /* SPHX2_MATCH_ALL = 0, */ 38 | "any", /* SPHX2_MATCH_ANY = 1, */ 39 | "phrase", /* SPHX2_MATCH_PHRASE = 2, */ 40 | "boolean", /* SPHX2_MATCH_BOOLEAN = 3, */ 41 | "extended", /* SPHX2_MATCH_EXTENDED = 4, */ 42 | "fullscan", /* SPHX2_MATCH_FULLSCAN = 5, */ 43 | #if 0 44 | -- not supported for near future removal -- 45 | SPHX2_MATCH_EXTENDED2 = 6 46 | #endif 47 | }; 48 | 49 | static const size_t sz_match_mode_strs = 50 | sizeof(s_match_mode_strs) / sizeof(const char*); 51 | 52 | static const char* s_ranker_strs[] = { 53 | "proxbm25", /* SPHX2_RANK_PROXIMITY_BM25 = 0, */ 54 | "bm25", /* SPHX2_RANK_BM25 = 1, */ 55 | "none", /* SPHX2_RANK_NONE = 2, */ 56 | "wordcount", /* SPHX2_RANK_WORDCOUNT = 3, */ 57 | "prox", /* SPHX2_RANK_PROXIMITY = 4, */ 58 | "matchany", /* SPHX2_RANK_MATCHANY = 5, */ 59 | "fieldmask", /* SPHX2_RANK_FIELDMASK = 6, */ 60 | "sph04", /* SPHX2_RANK_SPH04 = 7, */ 61 | "expr", /* SPHX2_RANK_EXPR = 8, */ 62 | "total", /* SPHX2_RANK_TOTAL = 9 */ 63 | }; 64 | 65 | static const size_t sz_ranker_strs = 66 | sizeof(s_ranker_strs) / sizeof(const char*); 67 | 68 | static const char* s_sort_mode_strs[] = { 69 | "relevance", /* SPHX2_SORT_RELEVANCE = 0, */ 70 | "attr_desc", /* SPHX2_SORT_ATTR_DESC = 1, */ 71 | "attr_asc", /* SPHX2_SORT_ATTR_ASC = 2, */ 72 | "time_seg", /* SPHX2_SORT_TIME_SEGMENTS = 3, */ 73 | "extendex", /* SPHX2_SORT_EXTENDED = 4, */ 74 | "expr", /* SPHX2_SORT_EXPR = 5 */ 75 | }; 76 | 77 | static const size_t sz_sort_mode_strs = 78 | sizeof(s_sort_mode_strs) / sizeof(const char*); 79 | 80 | static const char* s_filter_type_strs[] = { 81 | "vals", /* SPHX2_FILTER_VALUES = 0, */ 82 | "range", /* SPHX2_FILTER_RANGE = 1, */ 83 | "frange", /* SPHX2_FILTER_FLOATRANGE = 2, */ 84 | }; 85 | 86 | static const size_t sz_filter_type_strs = 87 | sizeof(s_filter_type_strs)/sizeof(const char*); 88 | 89 | static const char* s_filter_exclude_strs[] = { 90 | "in", 91 | "ex" 92 | }; 93 | 94 | static const size_t sz_filter_exclude_strs = 95 | sizeof(s_filter_exclude_strs)/sizeof(const char*); 96 | 97 | static const char* s_group_type_strs[] = { 98 | "day", /* SPHX2_GROUPBY_DAY = 0, */ 99 | "week", /* SPHX2_GROUPBY_WEEK = 1, */ 100 | "month", /* SPHX2_GROUPBY_MONTH = 2, */ 101 | "year", /* SPHX2_GROUPBY_YEAR = 3, */ 102 | "attr", /* SPHX2_GROUPBY_ATTR = 4, */ 103 | "attrpair", /* SPHX2_GROUPBY_ATTRPAIR = 5, */ 104 | }; 105 | 106 | static const size_t sz_group_type_strs = 107 | sizeof(s_group_type_strs)/sizeof(const char*); 108 | 109 | static const char* s_output_type_strs[] = { 110 | "raw", 111 | "text", 112 | "json", 113 | "xml" 114 | }; 115 | 116 | static const size_t sz_output_type_strs = 117 | sizeof(s_output_type_strs)/sizeof(const char*); 118 | 119 | static sphx2_arg_parse_ctx_t s_main_ctxt, s_sec_ctxt; 120 | 121 | static const char* s_no_delim = "$"; 122 | static const char* s_set_delim = ","; 123 | static const char* s_key_val_delim = ":"; 124 | static const char* s_multi_delim = ";"; 125 | 126 | static const size_t sz16 = sizeof(uint16_t), 127 | sz32 = sizeof(uint32_t), 128 | sz64 = sizeof(uint64_t), 129 | szf = sizeof(float); 130 | 131 | static ngx_str_t empty_str = ngx_null_string; 132 | static ngx_str_t default_select = ngx_string("*"); 133 | 134 | /* FUNCTION DEFINITIONS */ 135 | 136 | #include 137 | 138 | static void s_dump_buffer(u_char* ptr, size_t len) { 139 | size_t i; char c; 140 | fprintf(stderr, "buf [%lu]: ", len); 141 | for(i = 0; i < len; ++i) { 142 | c = ptr[i]; 143 | if(isprint(c)) fprintf(stderr, "%c", c); 144 | else if(c < 0x10) fprintf(stderr, "0%x", c); 145 | else fprintf(stderr, "%x", c); 146 | } 147 | fprintf(stderr, "\n"); 148 | } 149 | 150 | /* Functions to handle search request */ 151 | 152 | /* 153 | * Assumptions/Limitations in implementation of searchd protocol for Search 154 | * 155 | * -- which elements of search request are not supported 156 | * -- supported only with particular values 157 | * 158 | * 1 deprecated 'weights' field not supported. corresponding num-weights 159 | * field is always 0. 160 | * 161 | * 2 'values' filter is not supported. only 'range' and 'float range'. 162 | * 163 | * 3 'overrides' not supported. field having number of overrides is 0 always. 164 | * 165 | * 4 For distributed search - 0 is default for cutoff, retrycount, retrydelay 166 | * 167 | * 5 'maxquerytime' is 0 (unlimited) 168 | */ 169 | 170 | static size_t 171 | s_sphx2_search_request_len(sphx2_search_input_t * srch_input) 172 | { 173 | /* Default part of the request */ 174 | 175 | /* uint32_t vars: 176 | * offset, limit, mode, ranker, sort mode, sort by len, 177 | * keywords len, [depre] num weights (always 0), index len, 178 | * id range marker, num filters, group type, group by len, 179 | * max matches, group sort len, cutoff, retry count, 180 | * retry delay, group distinct len, is geo there?, 181 | * num index weights, max query time, num field weights, 182 | * comment len, num overrides, select len 183 | * 184 | * uint64_t vars: 185 | * [depre] idx min, [depre] idx max 186 | */ 187 | 188 | static const size_t num_default_32s = 26, 189 | num_default_64s = 2; 190 | 191 | size_t request_len = num_default_32s * sz32 + num_default_64s * sz64; 192 | 193 | sphx2_filter_t* f; 194 | sphx2_weight_t* w; 195 | 196 | size_t i; 197 | 198 | /* Calculate variable length of the request */ 199 | request_len += 200 | ((SPHX2_RANK_EXPR==srch_input->ranker) /* rank expr */ 201 | ? (sz32 + srch_input->rank_expr->len) : 0) 202 | + srch_input->sort_by->len /* sort by */ 203 | + srch_input->keywords->len /* keywords */ 204 | + srch_input->index->len /* index */ 205 | + srch_input->group->attr->len + /* group by attr */ 206 | + srch_input->group->sort->len /* group sort type */ 207 | + srch_input->group->distinct->len /* group distinct */ 208 | + ((NULL != srch_input->geo) /* geo */ 209 | ? (2 * szf + 2 * sz32 + srch_input->geo->lat_attr->len + 210 | srch_input->geo->lon_attr->len) : 0) 211 | + empty_str.len /* comment */ 212 | + default_select.len;/* select */ 213 | ; 214 | 215 | /* Filters */ 216 | f = srch_input->filters; 217 | for(i = 0; i < srch_input->num_filters; ++i) { 218 | request_len += 219 | (sz32 + f->attr->len) /* attr */ + 2 * sz32; /* type, exclude */ 220 | switch(f->type) { 221 | /*case SPHX2_FILTER_VALUES: 222 | request_len += (sz32 + sz64 * f->spec.vals.n); 223 | break;*/ 224 | case SPHX2_FILTER_RANGE: request_len += 2 * sz64; break; 225 | case SPHX2_FILTER_FLOATRANGE: request_len += 2 * szf; break; 226 | default: return (NGX_ERROR); 227 | } 228 | f = f->next; 229 | } 230 | 231 | /* Weights */ 232 | w = srch_input->index_weights; /* index weights */ 233 | for(i = 0; i < srch_input->num_index_weights; ++i) { 234 | request_len += (sz32 + w->entity->len + sz32); /* idx, weight */ 235 | w = w->next; 236 | } 237 | w = srch_input->field_weights; /* field weights */ 238 | for(i = 0; i < srch_input->num_field_weights; ++i) { 239 | request_len += (sz32 + w->entity->len + sz32); /* field, weight */ 240 | w = w->next; 241 | } 242 | 243 | return(request_len); 244 | } 245 | 246 | static ngx_int_t 247 | s_write_filters_to_stream( 248 | sphx2_search_input_t * input, 249 | sphx2_stream_t * st) 250 | { 251 | size_t i; 252 | ngx_int_t status; 253 | sphx2_filter_t * f; 254 | 255 | f = input->filters; 256 | 257 | for(i = 0; i < input->num_filters; ++i) { 258 | status = 259 | sphx2_stream_write_string(st, f->attr) 260 | || sphx2_stream_write_int32(st, (uint32_t)f->type) 261 | || ((SPHX2_FILTER_RANGE == f->type) 262 | ? ( sphx2_stream_write_int64(st, f->spec.ir.min) 263 | || sphx2_stream_write_int64(st, f->spec.ir.max)) 264 | : ( sphx2_stream_write_float(st, (float)f->spec.fr.min) 265 | || sphx2_stream_write_float(st, (float)f->spec.fr.max))) 266 | || sphx2_stream_write_int32(st, (uint32_t)f->exclude); 267 | 268 | if(NGX_OK != status) return(status); 269 | 270 | f = f->next; 271 | } 272 | 273 | return(NGX_OK); 274 | } 275 | 276 | static ngx_int_t 277 | s_write_weights_to_stream( 278 | uint32_t num_weights, 279 | sphx2_weight_t * weights, 280 | sphx2_stream_t * st) 281 | { 282 | size_t i; 283 | ngx_int_t status; 284 | sphx2_weight_t * w; 285 | 286 | w = weights; 287 | 288 | for(i = 0; i < num_weights; ++i) { 289 | status = 290 | sphx2_stream_write_string(st, w->entity) 291 | || sphx2_stream_write_int32(st, (uint32_t)w->weight); 292 | 293 | if(NGX_OK != status) return(status); 294 | 295 | w = w->next; 296 | } 297 | 298 | return(NGX_OK); 299 | } 300 | 301 | ngx_int_t 302 | sphx2_create_search_request( 303 | ngx_pool_t * pool, 304 | sphx2_search_input_t * input, 305 | ngx_buf_t ** b) 306 | { 307 | size_t request_len = s_sphx2_search_request_len(input); 308 | 309 | /* data to send = 310 | * handshake = version [4] 311 | * . header = command [2] . command_version [2] . bytes following [4] 312 | * . 0 [4] . num_queries [4] 313 | * . request [request_len] 314 | */ 315 | size_t buf_len = 316 | (2 * sz16 + 4 * sz32) + request_len; 317 | 318 | sphx2_stream_t* st = sphx2_stream_create(pool); 319 | 320 | ngx_int_t status; 321 | 322 | if(NGX_ERROR == sphx2_stream_alloc(st, buf_len)) { 323 | return(NGX_ERROR); 324 | } 325 | 326 | status = 327 | /* handshake */ 328 | sphx2_stream_write_int32(st, (uint32_t)SPHX2_CLI_VERSION) 329 | /* header - command */ 330 | || sphx2_stream_write_int16(st, (uint16_t)SPHX2_COMMAND_SEARCH) 331 | /* header - command ver */ 332 | || sphx2_stream_write_int16(st, (uint16_t)SPHX2_VER_COMMAND_SEARCH) 333 | /* bytes after this variable in the request */ 334 | || sphx2_stream_write_int32(st, (uint32_t)(request_len + 2 * sz32)) 335 | || sphx2_stream_write_int32(st, (uint32_t)0) /* 0 in 2.x */ 336 | || sphx2_stream_write_int32(st, (uint32_t)1) /* query count */ 337 | /* the single search query request here onwards ... */ 338 | || sphx2_stream_write_int32(st, (uint32_t)input->offset) 339 | || sphx2_stream_write_int32(st, (uint32_t)input->num_results) 340 | || sphx2_stream_write_int32(st, (uint32_t)input->match_mode) 341 | || sphx2_stream_write_int32(st, (uint32_t)input->ranker) 342 | || ((SPHX2_RANK_EXPR == input->ranker) 343 | ? sphx2_stream_write_string(st, input->rank_expr) 344 | : NGX_OK) 345 | || sphx2_stream_write_int32(st, (uint32_t)input->sort_mode) 346 | || sphx2_stream_write_string(st, input->sort_by) 347 | || sphx2_stream_write_string(st, input->keywords) 348 | || sphx2_stream_write_int32(st, (uint32_t)0) /* [d] weights count */ 349 | || sphx2_stream_write_string(st, input->index) 350 | || sphx2_stream_write_int32(st, (uint32_t)1) /* [d] range marker */ 351 | || sphx2_stream_write_int64(st, (uint64_t)0) /* [d] min */ 352 | || sphx2_stream_write_int64(st, (uint64_t)0) /* [d] max */ 353 | || sphx2_stream_write_int32(st, (uint32_t)input->num_filters) 354 | || ((0 != input->num_filters) 355 | ? s_write_filters_to_stream(input, st) 356 | : NGX_OK) 357 | || sphx2_stream_write_int32(st, (uint32_t)input->group->type) 358 | || sphx2_stream_write_string(st, input->group->attr) 359 | || sphx2_stream_write_int32(st, (uint32_t)input->max_matches) 360 | || sphx2_stream_write_string(st, input->group->sort) 361 | || sphx2_stream_write_int32(st, (uint32_t)0) /* cut off */ 362 | || sphx2_stream_write_int32(st, (uint32_t)0) /* retry count */ 363 | || sphx2_stream_write_int32(st, (uint32_t)0) /* retry delay */ 364 | || sphx2_stream_write_string(st, input->group->distinct) 365 | || ((NULL != input->geo) 366 | ? ( sphx2_stream_write_int32(st, (uint32_t)1) /* have geo */ 367 | || sphx2_stream_write_string(st, input->geo->lat_attr) 368 | || sphx2_stream_write_string(st, input->geo->lon_attr) 369 | || sphx2_stream_write_float(st, input->geo->lat) 370 | || sphx2_stream_write_float(st, input->geo->lon)) 371 | : sphx2_stream_write_int32(st, (uint32_t)0)) /* no geo */ 372 | || sphx2_stream_write_int32(st, (uint32_t)input->num_index_weights) 373 | || ((0 != input->num_index_weights) 374 | ? s_write_weights_to_stream(input->num_index_weights, 375 | input->index_weights, st) 376 | : NGX_OK) 377 | || sphx2_stream_write_int32(st, (uint32_t)0) /* maxqtime - no limit */ 378 | || sphx2_stream_write_int32(st, (uint32_t)input->num_field_weights) 379 | || ((0 != input->num_field_weights) 380 | ? s_write_weights_to_stream(input->num_field_weights, 381 | input->field_weights, st) 382 | : NGX_OK) 383 | || sphx2_stream_write_string(st, &empty_str) /* empty comments */ 384 | || sphx2_stream_write_int32(st, (uint32_t)0) /* [us] num overrides */ 385 | || sphx2_stream_write_string(st, &default_select) /* select all attrs */ 386 | ; 387 | 388 | *b = sphx2_stream_get_buf(st); 389 | 390 | s_dump_buffer((*b)->pos, buf_len); 391 | 392 | return status; 393 | } 394 | 395 | /* Functions to handle excerpt request */ 396 | 397 | static size_t 398 | s_sphx2_excerpt_request_len(sphx2_excerpt_input_t * input) 399 | { 400 | /* constant part of the request */ 401 | 402 | /* uint32 elements 403 | * 404 | * mode, flags, index len, keywords len, before match len, 405 | * after match len, chunk separator len, limit, around, 406 | * limit passages, limit words, start passage id, html strip mode len, 407 | * passage boundary len, docs count 408 | */ 409 | 410 | static const size_t num_default_32s = 15; 411 | 412 | size_t request_len = sz32 * num_default_32s, i; 413 | 414 | sphx2_doc_t * doc; 415 | 416 | /* variable part */ 417 | request_len += input->index->len 418 | + input->keywords->len 419 | + input->excerpt_opts->before_match->len 420 | + input->excerpt_opts->after_match->len 421 | + input->excerpt_opts->chunk_separator->len 422 | + input->excerpt_opts->html_strip_mode->len 423 | + input->excerpt_opts->passage_boundary->len; 424 | 425 | doc = input->docs; 426 | for(i = 0; i < input->num_docs; ++i) { 427 | request_len += (sz32 + doc->doc->len); 428 | doc = doc->next; 429 | } 430 | 431 | return(request_len); 432 | } 433 | 434 | static ngx_int_t 435 | s_write_docs_to_stream( 436 | uint32_t num_docs, 437 | sphx2_doc_t * docs, 438 | sphx2_stream_t * st) 439 | { 440 | size_t i; 441 | ngx_int_t status; 442 | sphx2_doc_t * d; 443 | 444 | d = docs; 445 | 446 | for(i = 0; i < num_docs; ++i) { 447 | status = sphx2_stream_write_string(st, d->doc); 448 | 449 | if(NGX_OK != status) return(status); 450 | 451 | d = d->next; 452 | } 453 | 454 | return(NGX_OK); 455 | } 456 | 457 | ngx_int_t 458 | sphx2_create_excerpt_request( 459 | ngx_pool_t * pool, 460 | sphx2_excerpt_input_t * input, 461 | ngx_buf_t ** b) 462 | { 463 | size_t request_len = s_sphx2_excerpt_request_len(input); 464 | 465 | /* data to send = 466 | * handshake = version [4] 467 | * . header = command [2] . command_version [2] . bytes following [4] 468 | * . request [request_len] 469 | */ 470 | size_t buf_len = (2 * sz16 + 2 * sz32) + request_len; 471 | 472 | sphx2_stream_t* st = sphx2_stream_create(pool); 473 | 474 | ngx_int_t status; 475 | 476 | if(NGX_ERROR == sphx2_stream_alloc(st, buf_len)) { 477 | return(NGX_ERROR); 478 | } 479 | 480 | status = 481 | /* handshake */ 482 | sphx2_stream_write_int32(st, (uint32_t)SPHX2_CLI_VERSION) 483 | /* header - command */ 484 | || sphx2_stream_write_int16(st, (uint16_t)SPHX2_COMMAND_EXCERPT) 485 | /* header - command ver */ 486 | || sphx2_stream_write_int16(st, (uint16_t)SPHX2_VER_COMMAND_EXCERPT) 487 | /* bytes after this variable in the request */ 488 | || sphx2_stream_write_int32(st, (uint32_t)request_len) 489 | /* the excerpt request here onwards ... */ 490 | || sphx2_stream_write_int32(st, (uint32_t)0) /* mode = 0 */ 491 | || sphx2_stream_write_int32(st, input->excerpt_opts->opts_flag) 492 | || sphx2_stream_write_string(st, input->index) 493 | || sphx2_stream_write_string(st, input->keywords) 494 | || sphx2_stream_write_string(st, input->excerpt_opts->before_match) 495 | || sphx2_stream_write_string(st, input->excerpt_opts->after_match) 496 | || sphx2_stream_write_string(st, input->excerpt_opts->chunk_separator) 497 | || sphx2_stream_write_int32(st, input->excerpt_opts->limit) 498 | || sphx2_stream_write_int32(st, input->excerpt_opts->around) 499 | || sphx2_stream_write_int32(st, input->excerpt_opts->limit_passages) 500 | || sphx2_stream_write_int32(st, input->excerpt_opts->limit_words) 501 | || sphx2_stream_write_int32(st, input->excerpt_opts->start_passage_id) 502 | || sphx2_stream_write_string(st, input->excerpt_opts->html_strip_mode) 503 | || sphx2_stream_write_string(st, input->excerpt_opts->passage_boundary) 504 | || sphx2_stream_write_int32(st, input->num_docs) 505 | || ((0 != input->num_docs) 506 | ? s_write_docs_to_stream(input->num_docs, input->docs, st) 507 | : NGX_OK) 508 | ; 509 | 510 | *b = sphx2_stream_get_buf(st); 511 | 512 | s_dump_buffer((*b)->pos, buf_len); 513 | 514 | return status; 515 | } 516 | 517 | /* Functions to work with searchd response */ 518 | 519 | static ngx_int_t 520 | s_sphx2_parse_response_header( 521 | ngx_pool_t * pool, 522 | ngx_buf_t * b, 523 | uint32_t * len) 524 | { 525 | ngx_int_t status; 526 | sphx2_stream_t* st; 527 | uint32_t searchd_proto; 528 | uint16_t sphx_status; 529 | uint16_t version; 530 | 531 | if(NULL == (st = sphx2_stream_create(pool))) { 532 | return(NGX_ERROR); 533 | } 534 | 535 | if(NGX_ERROR == sphx2_stream_set_buf(st, b)) { 536 | return(NGX_ERROR); 537 | } 538 | 539 | if(NGX_ERROR == (status = 540 | sphx2_stream_read_int32(st, &searchd_proto) 541 | || sphx2_stream_read_int16(st, &sphx_status) 542 | || sphx2_stream_read_int16(st, &version))) 543 | { 544 | return(NGX_HTTP_UPSTREAM_INVALID_HEADER); 545 | } 546 | 547 | if(SPHX2_SEARCHD_PROTO != searchd_proto) { 548 | return(NGX_HTTP_UPSTREAM_INVALID_HEADER); 549 | } 550 | 551 | switch(sphx_status) { 552 | case SPHX2_SEARCHD_OK: 553 | case SPHX2_SEARCHD_ERROR: 554 | case SPHX2_SEARCHD_RETRY: 555 | case SPHX2_SEARCHD_WARNING: 556 | if(NGX_ERROR == sphx2_stream_read_int32(st, len)) { 557 | return(NGX_HTTP_UPSTREAM_INVALID_HEADER); 558 | } 559 | s_dump_buffer(b->pos, *len); 560 | break; 561 | default: 562 | return(NGX_HTTP_UPSTREAM_INVALID_HEADER); 563 | } 564 | 565 | return(NGX_OK); 566 | } 567 | 568 | ngx_int_t 569 | sphx2_parse_search_response_header( 570 | ngx_pool_t * pool, 571 | ngx_buf_t * b, 572 | sphx2_search_response_ctx_t * ctx) 573 | { 574 | return(s_sphx2_parse_response_header(pool, b, &ctx->len)); 575 | } 576 | 577 | ngx_int_t 578 | sphx2_parse_excerpt_response_header( 579 | ngx_pool_t * pool, 580 | ngx_buf_t * b, 581 | sphx2_excerpt_response_ctx_t * ctx) 582 | { 583 | return(s_sphx2_parse_response_header(pool, b, &ctx->len)); 584 | } 585 | 586 | /* Functions to work with URL query param arg parsing */ 587 | 588 | #define DEFINE_ENUM_ARG_PARSE_FUNCTION(key) \ 589 | ngx_int_t \ 590 | sphx2_parse_ ## key ## _str( \ 591 | ngx_pool_t * pool, \ 592 | ngx_str_t * key ##_str, \ 593 | sphx2_## key ##_t * key) \ 594 | { \ 595 | int32_t i = 0; \ 596 | \ 597 | sphx2_arg_parse_hint_t s_ ## key ## _hints[] = \ 598 | { \ 599 | { SPHX2_ARG_TYPE_ENUM, s_ ## key ## _strs, sz_ ## key ## _strs }, \ 600 | { SPHX2_ARG_TYPE_NONE, NULL, 0 } \ 601 | }; \ 602 | \ 603 | if(NULL == key ## _str || 0 == key ## _str->len) { \ 604 | *key = sphx2_default_ ## key; \ 605 | return(NGX_OK); \ 606 | } \ 607 | \ 608 | if(NGX_ERROR == sphx2_arg_parse_register(&s_main_ctxt, \ 609 | pool, (char*)key ## _str->data, s_ ## key ## _hints, s_no_delim)) \ 610 | { \ 611 | return(NGX_ERROR); \ 612 | } \ 613 | \ 614 | if(NGX_ERROR == (i = \ 615 | sphx2_arg_parse_get_enum_arg(&s_main_ctxt, NULL, 0))) \ 616 | { \ 617 | return(NGX_ERROR); \ 618 | } \ 619 | \ 620 | *key = (sphx2_ ## key ## _t)i; \ 621 | return NGX_OK; \ 622 | } 623 | 624 | #define MULTI_ARG_PARSE_FUNCTION_SIGNATURE(key) \ 625 | ngx_int_t \ 626 | sphx2_parse_ ## key ## s_str( \ 627 | ngx_pool_t * pool, \ 628 | ngx_str_t * key ## s_str, \ 629 | sphx2_## key ## _t ** key ## s, \ 630 | uint32_t * num_ ## key ## s) 631 | 632 | #define MULTI_ARG_PARSE_FUNCTION_BODY(key, elem_delim) \ 633 | sphx2_ ## key ## _t *key; \ 634 | \ 635 | assert(NULL != key ## s_str && 0 != key ## s_str->len); \ 636 | \ 637 | *key ## s = NULL; \ 638 | *num_ ## key ## s = 0; \ 639 | \ 640 | if(NGX_ERROR == sphx2_arg_parse_register(&s_main_ctxt, \ 641 | pool, (char*)key ## s_str->data, NULL, s_multi_delim)) \ 642 | { \ 643 | return(NGX_ERROR); \ 644 | } \ 645 | \ 646 | while(NGX_ERROR != sphx2_arg_step(&s_main_ctxt)) { \ 647 | \ 648 | if(NGX_ERROR == sphx2_arg_parse_register_child( \ 649 | &s_sec_ctxt, &s_main_ctxt, pool, \ 650 | s_ ## key ## _hints, elem_delim)) \ 651 | { \ 652 | return(NGX_ERROR); \ 653 | } \ 654 | \ 655 | if(NULL == (key = \ 656 | ngx_pcalloc(pool, sizeof(sphx2_ ## key ## _t)))) \ 657 | { \ 658 | return NGX_ERROR; \ 659 | } \ 660 | \ 661 | if(NGX_ERROR == sphx2_arg_parse_whole_using_hints(&s_sec_ctxt, key)) \ 662 | { \ 663 | return NGX_ERROR; \ 664 | } \ 665 | \ 666 | LIST_ADD(*key ## s, key, *num_ ## key ## s); \ 667 | } \ 668 | \ 669 | return NGX_OK; 670 | 671 | #define SET_ARG_PARSE_FUNCTION_SIGNATURE(key) \ 672 | ngx_int_t \ 673 | sphx2_parse_ ## key ## _str( \ 674 | ngx_pool_t * pool, \ 675 | ngx_str_t * key ## _str, \ 676 | sphx2_ ## key ## _t ** key) \ 677 | 678 | #define SET_ARG_PARSE_FUNCTION_BODY(key) \ 679 | if(NGX_ERROR == sphx2_arg_parse_register(&s_main_ctxt, \ 680 | pool, (char*)key ## _str->data, s_ ## key ## _hints, s_set_delim)) \ 681 | { \ 682 | return(NGX_ERROR); \ 683 | } \ 684 | \ 685 | if(NULL == (*key = ngx_pcalloc(pool, sizeof(sphx2_ ## key ## _t)))) { \ 686 | return NGX_ERROR; \ 687 | } \ 688 | \ 689 | return(sphx2_arg_parse_whole_using_hints(&s_main_ctxt, *key)); 690 | 691 | 692 | DEFINE_ENUM_ARG_PARSE_FUNCTION(match_mode) 693 | DEFINE_ENUM_ARG_PARSE_FUNCTION(ranker) 694 | DEFINE_ENUM_ARG_PARSE_FUNCTION(sort_mode) 695 | DEFINE_ENUM_ARG_PARSE_FUNCTION(output_type) 696 | 697 | MULTI_ARG_PARSE_FUNCTION_SIGNATURE(weight) 698 | { 699 | sphx2_arg_parse_hint_t s_weight_hints[] = 700 | { 701 | { SPHX2_ARG_TYPE_STRING, NULL, 0 }, 702 | { SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 703 | { SPHX2_ARG_TYPE_NONE, NULL, 0 } 704 | }; 705 | 706 | MULTI_ARG_PARSE_FUNCTION_BODY(weight, s_key_val_delim) 707 | } 708 | 709 | MULTI_ARG_PARSE_FUNCTION_SIGNATURE(filter) 710 | { 711 | sphx2_arg_parse_hint_t s_filter_hints[] = 712 | { 713 | { SPHX2_ARG_TYPE_STRING, NULL, 0 }, 714 | { SPHX2_ARG_TYPE_ENUM, s_filter_exclude_strs, sz_filter_exclude_strs }, 715 | { SPHX2_ARG_TYPE_ENUM, s_filter_type_strs, sz_filter_type_strs }, 716 | { SPHX2_ARG_TYPE_INTEGER64, NULL, 0 }, 717 | { SPHX2_ARG_TYPE_INTEGER64, NULL, 0 }, 718 | { SPHX2_ARG_TYPE_NONE, NULL, 0 } 719 | }; 720 | 721 | MULTI_ARG_PARSE_FUNCTION_BODY(filter, s_set_delim) 722 | } 723 | 724 | static ngx_str_t s_dflt_sort = ngx_string("@group desc"); 725 | static ngx_str_t s_empty_str = ngx_null_string; 726 | 727 | SET_ARG_PARSE_FUNCTION_SIGNATURE(group) 728 | { 729 | sphx2_arg_parse_hint_t s_group_hints[] = 730 | { 731 | { SPHX2_ARG_TYPE_ENUM, s_group_type_strs, sz_group_type_strs }, 732 | { SPHX2_ARG_TYPE_STRING, NULL, 0 }, 733 | { SPHX2_ARG_TYPE_STRING, NULL, 0 }, 734 | { SPHX2_ARG_TYPE_STRING, NULL, 0 }, 735 | { SPHX2_ARG_TYPE_NONE, NULL, 0 } 736 | }; 737 | 738 | /* there must be a default group specification as per Sphinx 2.0 protocol */ 739 | if(NULL == group_str || 0 == group_str->len) { 740 | 741 | if(NULL == (*group = ngx_pcalloc(pool, sizeof(sphx2_group_t)))) { 742 | return NGX_ERROR; 743 | } 744 | 745 | (*group)->type = SPHX2_GROUPBY_DAY; 746 | (*group)->sort = &s_dflt_sort; 747 | (*group)->attr = &s_empty_str; 748 | (*group)->distinct = &s_empty_str; 749 | 750 | return(NGX_OK); 751 | } 752 | 753 | SET_ARG_PARSE_FUNCTION_BODY(group) 754 | } 755 | 756 | SET_ARG_PARSE_FUNCTION_SIGNATURE(geo) 757 | { 758 | sphx2_arg_parse_hint_t s_geo_hints[] = 759 | { 760 | { SPHX2_ARG_TYPE_STRING, NULL, 0 }, 761 | { SPHX2_ARG_TYPE_STRING, NULL, 0 }, 762 | { SPHX2_ARG_TYPE_FLOAT, NULL, 0 }, 763 | { SPHX2_ARG_TYPE_FLOAT, NULL, 0 }, 764 | { SPHX2_ARG_TYPE_NONE, NULL, 0 } 765 | }; 766 | 767 | assert(NULL != geo_str && 0 != geo_str->len); 768 | 769 | SET_ARG_PARSE_FUNCTION_BODY(geo) 770 | } 771 | 772 | MULTI_ARG_PARSE_FUNCTION_SIGNATURE(doc) 773 | { 774 | sphx2_arg_parse_hint_t s_doc_hints[] = 775 | { 776 | { SPHX2_ARG_TYPE_STRING, NULL, 0 }, 777 | { SPHX2_ARG_TYPE_NONE, NULL, 0 } 778 | }; 779 | 780 | MULTI_ARG_PARSE_FUNCTION_BODY(doc, s_no_delim) 781 | } 782 | 783 | SET_ARG_PARSE_FUNCTION_SIGNATURE(excerpt_opts) 784 | { 785 | sphx2_arg_parse_hint_t s_excerpt_opts_hints[] = 786 | { 787 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_STRING, NULL, 0 }, 788 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_STRING, NULL, 0 }, 789 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_STRING, NULL, 0 }, 790 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 791 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 792 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 793 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 794 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 795 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 796 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 797 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 798 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 799 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 800 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 801 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 802 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_STRING, NULL, 0 }, 803 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 804 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_STRING, NULL, 0 }, 805 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 806 | { SPHX2_ARG_TYPE_KEYVAL | SPHX2_ARG_TYPE_INTEGER, NULL, 0 }, 807 | { SPHX2_ARG_TYPE_NONE, NULL, 0 } 808 | }; 809 | 810 | static ngx_str_t s_default_before_match = ngx_string(""); 811 | static ngx_str_t s_default_after_match = ngx_string(""); 812 | static ngx_str_t s_default_chunk_separator = ngx_string(" ... "); 813 | static uint32_t s_default_limit = 256; 814 | static uint32_t s_default_around = 5; 815 | static uint32_t s_default_start_passage_id = 1; 816 | static ngx_str_t s_default_html_strip_mode = ngx_string("index"); 817 | static ngx_str_t s_default_passage_boundary = ngx_string("none"); 818 | 819 | if(NULL == excerpt_opts_str || 0 == excerpt_opts_str->len) { 820 | 821 | if(NULL == (*excerpt_opts = ngx_pcalloc(pool, 822 | sizeof(sphx2_excerpt_opts_t)))) 823 | { 824 | return NGX_ERROR; 825 | } 826 | 827 | (*excerpt_opts)->before_match = &s_default_before_match; 828 | (*excerpt_opts)->after_match = &s_default_after_match; 829 | (*excerpt_opts)->chunk_separator = &s_default_chunk_separator; 830 | (*excerpt_opts)->limit = s_default_limit; 831 | (*excerpt_opts)->limit_passages = 0; 832 | (*excerpt_opts)->limit_words = 0; 833 | (*excerpt_opts)->around = s_default_around; 834 | (*excerpt_opts)->exact_phrase = 0; 835 | (*excerpt_opts)->single_passage = 0; 836 | (*excerpt_opts)->use_boundaries = 0; 837 | (*excerpt_opts)->weight_order = 0; 838 | (*excerpt_opts)->query_mode = 0; 839 | (*excerpt_opts)->force_all_words = 0; 840 | (*excerpt_opts)->start_passage_id = s_default_start_passage_id; 841 | (*excerpt_opts)->load_files = 0; 842 | (*excerpt_opts)->html_strip_mode = &s_default_html_strip_mode; 843 | (*excerpt_opts)->allow_empty = 0; 844 | (*excerpt_opts)->passage_boundary = &s_default_passage_boundary; 845 | (*excerpt_opts)->emit_zones = 0; 846 | (*excerpt_opts)->load_files_scattered = 0; 847 | 848 | return(NGX_OK); 849 | } 850 | 851 | SET_ARG_PARSE_FUNCTION_BODY(excerpt_opts) 852 | } 853 | 854 | void 855 | sphx2_create_opts_flag(sphx2_excerpt_opts_t * excerpt_opts) 856 | { 857 | excerpt_opts->opts_flag = 1; 858 | 859 | if ( excerpt_opts->exact_phrase != 0 ) excerpt_opts->opts_flag |= 2; 860 | if ( excerpt_opts->single_passage != 0 ) excerpt_opts->opts_flag |= 4; 861 | if ( excerpt_opts->use_boundaries != 0 ) excerpt_opts->opts_flag |= 8; 862 | if ( excerpt_opts->weight_order != 0 ) excerpt_opts->opts_flag |= 16; 863 | if ( excerpt_opts->query_mode != 0 ) excerpt_opts->opts_flag |= 32; 864 | if ( excerpt_opts->force_all_words != 0 ) excerpt_opts->opts_flag |= 64; 865 | if ( excerpt_opts->load_files != 0 ) excerpt_opts->opts_flag |= 128; 866 | if ( excerpt_opts->allow_empty != 0 ) excerpt_opts->opts_flag |= 256; 867 | if ( excerpt_opts->emit_zones != 0 ) excerpt_opts->opts_flag |= 512; 868 | if ( excerpt_opts->load_files_scattered != 0 ) excerpt_opts->opts_flag |= 1024; 869 | } 870 | --------------------------------------------------------------------------------