├── .gitignore ├── AUTHORS ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── build └── amd64 │ └── gichidan │ ├── build.sh │ └── mkdeb.json ├── cliface.go ├── data.go ├── doc.go ├── glide.yaml ├── main.go ├── parser.go ├── request.go ├── spider.go └── utils.go /.gitignore: -------------------------------------------------------------------------------- 1 | ############################################################################# 2 | #### This is an autogenerated .gitignore file #### 3 | ############################################################################# 4 | 5 | ###Go### 6 | 7 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 8 | *.o 9 | *. 10 | *.so 11 | 12 | # Folders 13 | _obj 14 | /bi 15 | /bin/* 16 | #_test 17 | 18 | # Architecture specific extensions/prefixes 19 | *.[568vq] 20 | [568vq].out 21 | 22 | *.cgo1.go 23 | *.cgo2.c 24 | _cgo_defun.c 25 | _cgo_gotypes.go 26 | _cgo_export.* 27 | 28 | _testmain.go 29 | 30 | *.exe 31 | *.test 32 | *.prof 33 | 34 | ###SublimeText### 35 | 36 | # cache files for sublime text 37 | *.tmlanguage.cache 38 | *.tmPreferences.cache 39 | *.stTheme.cache 40 | 41 | # workspace files are user-specific 42 | *.sublime-workspace 43 | 44 | # project files should be checked into the repository, unless a significant 45 | # proportion of contributors will probably not be using SublimeText 46 | *.sublime-project 47 | 48 | # sftp configuration file 49 | sftp-config.json 50 | 51 | ###Build objects### 52 | main 53 | /vendor 54 | /vendor/* 55 | .version.sh 56 | /notes/* 57 | /resources/* 58 | /resources 59 | /build/amd64/gichidan/*.deb 60 | /build/amd64/gichidan/gichidan 61 | README.e.md 62 | change.log 63 | build.sh 64 | glide.lock 65 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | hIMEI 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog - gichidan 2 | 3 | ### 1.1.1 4 | 5 | __Changes__ 6 | 7 | - Converting output to JSON format added. 8 | 9 | 10 | 11 | 12 | __Contributors__ 13 | 14 | - hIMEI 15 | 16 | Released by hIMEI, Sat 10 Feb 2018 - 17 | [see the diff](https://github.com/hIMEI29A/gichidan/compare/1.0.1...1.1.1#diff) 18 | ______________ 19 | 20 | ### 1.0.1 21 | 22 | __Changes__ 23 | 24 | - Bug with "Please login before using filters" message fixed. Now it's 25 | being handled without panic 26 | 27 | 28 | 29 | 30 | __Contributors__ 31 | 32 | - hIMEI 33 | 34 | Released by hIMEI, Wed 07 Feb 2018 - 35 | [see the diff](https://github.com/hIMEI29A/gichidan/compare/1.0.0...1.0.1#diff) 36 | ______________ 37 | 38 | ### 1.0.0 39 | 40 | __Changes__ 41 | 42 | - Search with logical expressions implemented 43 | - Deb package's install added 44 | - Documentation and README.md rewrited 45 | - Many changes in build system and inner program logic invisible to users 46 | 47 | 48 | 49 | 50 | __Contributors__ 51 | 52 | - hIMEI 53 | 54 | Released by hIMEI, Tue 06 Feb 2018 - 55 | [see the diff](https://github.com/hIMEI29A/gichidan/compare/0.1.1...1.0.0#diff) 56 | ______________ 57 | 58 | ### 0.1.1 59 | 60 | __Changes__ 61 | 62 | - CHANGELOG.md changed 63 | - Error with reciever of parser.checkPage() fixed 64 | 65 | __Contributors__ 66 | 67 | - hIMEI 68 | 69 | Released by hIMEI, Sun 21 Jan 2018 - 70 | [see the diff](https://github.com/hIMEI29A/gichidan/compare/0.1.0...0.1.1#diff) 71 | ______________ 72 | 73 | ### 0.1.0 74 | 75 | __Changes__ 76 | 77 | - README.md updated 78 | - CHANGELOG.md created 79 | - Makefile removed 80 | - ShortInfo option added to CLI 81 | - All features: search requests, html parsing, results collecting, CLI, info output work fine. 82 | - Save to file option fixed 83 | - Subcommands removed, now CLI with flags only 84 | - README.md added 85 | - All goroutines starts in main() only 86 | 87 | __Contributors__ 88 | 89 | - hIMEI 90 | 91 | Released by hIMEI, Sun 21 Jan 2018 - 92 | [see the diff](https://github.com/hIMEI29A/gichidan/compare/e57581e8c548fee66ffbff1b7dea693ee27a7b2d...0.1.0#diff) 93 | ______________ 94 | 95 | 96 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ########################################################################################### 2 | # WARNING!!! This script is for developping purposes only. Don't try to use it to install # 3 | # application. # 4 | ########################################################################################### 5 | 6 | BUILDDIR := $(CURDIR)/build/amd64/gichidan 7 | 8 | CC = go build --ldflags "-X main.VERSION=1.1.1" 9 | TARGET = gichidan 10 | ARTEFACT = cliface 11 | 12 | .PHONY: all install 13 | 14 | all: $(TARGET) 15 | 16 | $(TARGET): 17 | $(CC) 18 | 19 | install: $(TARGET) 20 | cp $(TARGET) $(BUILDDIR) 21 | rm -f $(TARGET) 22 | rm -f $(ARTEFACT) 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gichidan 2 | 3 | **gichidan** - command line wrapper with enhanced pentest features for 4 | (_onion link_) [**Ichidan**](http://ichidanv34wrx7m7.onion) - deep-web search engine. 5 | 6 | ███ █████ ███ █████ 7 | ░░░ ░░███ ░░░ ░░███ 8 | ███████ ████ ██████ ░███████ ████ ███████ ██████ ████████ 9 | ███░░███░░███ ███░░███ ░███░░███ ░░███ ███░░███ ░░░░░███ ░░███░░███ 10 | ░███ ░███ ░███ ░███ ░░░ ░███ ░███ ░███ ░███ ░███ ███████ ░███ ░███ 11 | ░███ ░███ ░███ ░███ ███ ░███ ░███ ░███ ░███ ░███ ███░░███ ░███ ░███ 12 | ░░███████ █████░░██████ ████ █████ █████░░████████░░████████ ████ █████ 13 | ░░░░░███░░░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░░░░░ ░░░░░░░░ ░░░░ ░░░░░ 14 | ███ ░███ ___onion secrets for console cowboys___ 15 | ░░██████ 16 | ░░░░░░ 17 | 18 | [![Go Report Card](https://goreportcard.com/badge/github.com/hIMEI29A/gichidan)](https://goreportcard.com/report/github.com/hIMEI29A/gichidan) [![GoDoc](https://godoc.org/github.com/hIMEI29A/gichidan?status.svg)](http://godoc.org/github.com/hIMEI29A/gichidan) [![Apache-2.0 License](https://img.shields.io/badge/license-Apache--2.0-red.svg)](LICENSE) 19 | 20 | Copyright 2017 hIMEI 21 | 22 | 23 | ## TOC 24 | - [About](#about) 25 | - [Features](#features) 26 | - [Version](#version) 27 | - [Install](#install) 28 | - [Usage](#usage) 29 | - [TODO](#todo) 30 | - [Contributing](#contributing) 31 | 32 | ## About 33 | 34 | **21.04.2018 UPD** Ichidan is dead last 3 month, so **Gichidan is deprecated** :( 35 | 36 | Forget about Tor Browser. Parse onion hosts from your console with **Gichidan** now. 37 | 38 | ##### License 39 | 40 | Apache-2.0 License 41 | 42 | ##### About Gichidan 43 | 44 | **Gichidan** is a CLI utility designed to collect information about deep-web hosts. 45 | 46 | ###### Dependencies 47 | 48 | github.com/antchfx/htmlquery 49 | github.com/antchfx/xpath 50 | github.com/hIMEI29A/gotorsocks 51 | golang.org/x/net/html 52 | 53 | ###### Short Ichidan's info 54 | 55 | Short info about Ichidan search engine from [here](https://www.cylance.com/en_us/blog/ichidan-a-search-engine-for-the-dark-web.html) 56 | 57 | > Ichidan is a type of Japanese verb which implies the first (“ichi”) time something is done. Now, Ichidan is also a search engine for looking up websites that are hosted through the Tor network, which may be the first time that's been done at this scale. 58 | 59 | > The search engine is less like Google and more like Shodan, in that it allows users to see technical information about .onion websites, including their connected network interfaces, such as TCP/IP ports. 60 | 61 | > Ichidan is a valuable resource for security researchers and law enforcement agencies who want to learn about what's happening on the Dark Web. 62 | 63 | ## Features 64 | 65 | **NEW!** Since version 1.0.0 search with logical expressions supported. 66 | See **Usage** section of this paper for details. 67 | 68 | ## Version 69 | 70 | **v1.1.1** 71 | 72 | ## Install 73 | 74 | 75 | ##### Install standalone deb binary package 76 | 77 | **Requirements**: Linux Debian/Ubuntu amd64 78 | 79 | Download `deb` package and install it with `dpkg` or `gdebi`: 80 | 81 | wget https://github.com/hIMEI29A/gichidan/releases/download/1.1.1/\ 82 | gichidan-1.1.1-amd64.deb && sudo dpkg -i gichidan-1.1.1-amd64.deb 83 | 84 | wget https://github.com/hIMEI29A/gichidan/releases/download/1.1.1/\ 85 | gichidan-1.1.1-amd64.deb && sudo gdebi gichidan-1.1.1-amd64.deb 86 | 87 | Using of `apt-get install` will be implemented soon. Check the [release page](https://github.com/hIMEI29A/gichidan/releases)! 88 | 89 | ##### Install from source 90 | 91 | If you are Golang programmer, you may want to get source code and build app from it manually. Do next steps. 92 | 93 | Project uses `glide` to manage dependencies, so install it first 94 | 95 | ```sh 96 | curl https://glide.sh/get | sh 97 | ``` 98 | Clone repo, install deps, then install **Gichidan** 99 | 100 | ```sh 101 | mkdir -p $GOPATH/src/github.com/hIMEI29A/gichidan 102 | cd $GOPATH/src/github.com/hIMEI29A/gichidan 103 | git clone https://github.com/hIMEI29A/gichidan.git . 104 | glide install 105 | go install 106 | ``` 107 | 108 | Done. 109 | 110 | ## Usage 111 | 112 | Gichidan's CLI options are: 113 | 114 | -b show ASCII banner 115 | -f string 116 | save results to file 117 | -h help message 118 | -j convert output to json 119 | -m Don't print GET request's messages (non-verbose output) 120 | -r string 121 | your search request to Ichidan 122 | -s print hosts urls only 123 | -v print current version 124 | 125 | Typical request to Ichidan looks like 126 | 127 | gichidan -r ichidan 128 | 129 | Output: 130 | 131 | Hosts found: 1 132 | Only one page 133 | 134 | Full info: 135 | 136 | http://ichidanv34wrx7m7.onion 137 | 2017-09-18 13:08:58 UTC 138 | tcpwrapped 139 | 80 140 | tcp 141 | tcpwrapped 142 | unknown VERSION 143 | 144 | http-headers: 145 | 146 | Server: nginx/1.10.3 (Ubuntu) 147 | Date: Mon, 18 Sep 2017 13:08:55 GMT 148 | Content-Type: text/html; charset=utf-8 149 | Connection: close 150 | X-Frame-Options: SAMEORIGIN 151 | X-XSS-Protection: 1; mode=block 152 | X-Content-Type-Options: nosniff 153 | ETag: W/"7e087af022204d46cb9b655936aa2915" 154 | Cache-Control: max-age=0, private, must-revalidate 155 | Set-Cookie: _ichidan_session=NXQ5NWc4ZmJiSHRnVVM2TDFmblVzcmo4NnY1aUdtUFZFY0VmcVpCTzJHUUx2T25XOUhKa0hMT2F4QS9LanVEMGNYeXlKaEwyNGFITjA1bjdsSE1PRnR3TTIrNEJuc3dtMS9JczM1c3haL0xsa0U5K3E4RytSbHNWakxYVTdhYmZ3dFdhRGhzTWR4SXdlT2VhMlhFRzNRPT0tLWpiOU9SMFJnbTFXeTJFamN6Q3FmU3c9PQ%3D%3D--6281f0c900799f334e5f8eb76589c89c38212d37; path=/; HttpOnly 156 | X-Request-Id: 1e002391-0137-41e1-83cd-acc6b69b5019 157 | X-Runtime: 0.005388 158 | 159 | (Request type: HEAD) 160 | 161 | http-server-header: 162 | nginx/1.10.3 (Ubuntu) 163 | http-title: 164 | Ichidan 165 | 166 | You may search by keywords (only to know what bad guys do): 167 | 168 | gichidan -r hacking 169 | 170 | gichidan -r paypal 171 | 172 | As well as by protocol, application name or service detail: 173 | 174 | gichidan -r ssh 175 | 176 | gichidan -r irc 177 | 178 | gichidan -r apache 179 | 180 | gichidan -r tcpwrapped 181 | 182 | gichidan -r prosody 183 | 184 | gichidan -r raspbian 185 | 186 | To save results in file use flag `-f` with full file path followed: 187 | 188 | gichidan -r telnet -f ~/my_folder/telnet_search.txt 189 | 190 | If you don't want to see all details info about collected servers, use `-s` ("short") option. In case of short info and output to file mode, your file will contains all details anymore: 191 | 192 | gichidan -r apache -s -f ~/my_folder/apache_search.txt 193 | 194 | To run program in non-verbose ("mute") mode, use `-m` flag. GET requests messages will not be printed in this case: 195 | 196 | gichidan -r accounts -m 197 | 198 | To print oldschool ASCII banner before crawling start, use `-b` flag: 199 | 200 | gichidan -r ejabberd -b 201 | 202 | Use `-j` flag to convert output to JSON format. This feature cannot use with `-s` flag. In case `-j` and `-s` provided both there will be error. You may also save JSON to output file with `-f` option. 203 | 204 | gichidan -r CentOS -j -f 205 | 206 | If Ichidan can not find anything by your request, application will display error: 207 | 208 | gichidan -r jdfhchgbverugbvcevcegrfvcew 209 | 210 | Output: 211 | 212 | 2013/01/20 16:12:12 Nothing found there! 213 | 214 | #### Logical expressions 215 | 216 | **NEW!** Since version 217 | v1.0.0 search with logical expressions supported. Here is a simple rules for its usage: 218 | 219 | Expression MUST contain no more than two words (_yet_) with an operator between them and MUST NOT contain spaces between words and operator. Operators are: 220 | 221 | AND "+" 222 | NOT "-" 223 | OR "=" 224 | 225 | Examples: 226 | 227 | It will show only results which satisfy "prosody" and "ejabberd" requests both: 228 | 229 | gichidan -r prosody+ejabberd 230 | 231 | It will show only results of "paypal" request wich not satisfy "crime" request: 232 | 233 | gichidan -r paypal-crime 234 | 235 | It will show results of "bbs" and "telnet" requests separately: 236 | 237 | gichidan -r bbs=telnet 238 | 239 | If search engine cannot find anything by one of words, application will display error: 240 | 241 | gichidan -r ssh+jdfhchgbverugbvcevcegrfvcew 242 | 243 | **Request MUST NOT contain spaces**. In case of request such as `gichidan -r prosody client`, only first word will be processed. Also search by host url is not supported (in most case) by app (and Ichidan too). 244 | 245 | **NOTE:** Tor Network it is not your vanilla Internet. It may be unstable or slow and there may be unexpected delays and errors. In this case you may try to simply restart tor service on your mashine: 246 | 247 | sudo service tor restart 248 | 249 | **NOTE:** Ichidan it is not your vanilla Google, Yandex or Baidu. On its [page](http://ichidanv34wrx7m7.onion) you wont even find contact info or credits. In first january days of new 2018 it was absolutely unavailable! So there is no guarantee to recieve any response! 250 | 251 | ## Contributing 252 | 253 | Feel free to contribute! 254 | 255 | #### Report a bug 256 | 257 | himei@tuta.io 258 | -------------------------------------------------------------------------------- /build/amd64/gichidan/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ########################################################################################### 4 | # WARNING!!! This script is for developping purposes only. Don't try to use it to install # 5 | # application. # 6 | ########################################################################################### 7 | 8 | # Functions 9 | ######################### 10 | 11 | # Prints usage message end exit 12 | usage() 13 | { 14 | echo "Usage : build.sh -v|--version [VERSION]" 15 | } 16 | 17 | # Variables 18 | ######################## 19 | 20 | FILE=*.deb 21 | REMOVE=gichidan 22 | 23 | # ClI args processing 24 | ######################## 25 | 26 | # if there is no args provided, print usage and exit 27 | if [ "$1" = "" ] 28 | then 29 | usage 30 | exit 31 | fi 32 | 33 | # args parsing 34 | while [ "$1" != "" ]; do 35 | case $1 in 36 | -v | --version ) 37 | VERSION="$2" 38 | shift 39 | shift 40 | echo "Version = $VERSION" 41 | ;; 42 | * ) 43 | usage 44 | exit 1 45 | esac 46 | done 47 | 48 | # Package building 49 | ####################### 50 | 51 | mkdeb build -version="$VERSION" mkdeb.json 52 | rm "$REMOVE" 53 | echo "Binary removed" 54 | -------------------------------------------------------------------------------- /build/amd64/gichidan/mkdeb.json: -------------------------------------------------------------------------------- 1 | { 2 | "package": "gichidan", 3 | "name": "gichidan", 4 | "architecture": "amd64", 5 | "maintainer": "hIMEI ", 6 | "copyright": "2018 hIMEI ", 7 | "description": "Gichidan - CLI wrapper for Ichidan deep-web search engine", 8 | "license": "Apache-2.0", 9 | "depends": [], 10 | "preDepends": [], 11 | "section": "default", 12 | "priority": "extra", 13 | "homepage": "https://github.com/hIMEI29A/gichidan", 14 | "preinst": "", 15 | "postinst": "", 16 | "prerm": "", 17 | "postrm": "", 18 | "autoPath": "deb-pkg", 19 | "files": { 20 | "gichidan": "/usr/local/bin/gichidan" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /cliface.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 hIMEI 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | /* 16 | * File cliface.go contains functions for iteracting with user's terminal. 17 | * Errors messages is also created here. 18 | */ 19 | 20 | package main 21 | 22 | import ( 23 | "fmt" 24 | "time" 25 | ) 26 | 27 | func makeErrString(errConst string) string { 28 | errString := BOLD + RED + errConst + RESET 29 | return errString 30 | } 31 | 32 | func makeMessage(messageConst string) string { 33 | message := BOLD + YEL + messageConst + RESET 34 | return message 35 | } 36 | 37 | func makeValMessage(value string) string { 38 | message := BOLD + CYN + value + RESET 39 | return message 40 | } 41 | 42 | func makeUrlMessage(url string) string { 43 | message := BOLD + GRN + url + RESET 44 | return message 45 | } 46 | 47 | func getTotalStats(bloodyRoots map[string]string, finalHosts []*Host, total int) string { 48 | stats := makeMessage(FOUND) + makeValMessage(iToa(total)) + "\n" 49 | 50 | for i, s := range bloodyRoots { 51 | stats += makeMessage(BYREQ) + makeValMessage(i) + ": " + makeValMessage(s) + "\n" 52 | } 53 | 54 | stats += makeValMessage(iToa(len(finalHosts))) + makeMessage(WILL) 55 | 56 | return stats 57 | } 58 | 59 | func pressAny() { 60 | var input string 61 | fmt.Println(makeMessage(PRESS)) 62 | fmt.Scanln(&input) 63 | } 64 | 65 | func banner() { 66 | fmt.Println(GRN, " ███ ", CYN, "█████ ", GRN, "███ ", CYN, "█████ ") 67 | time.Sleep(100 * time.Millisecond) 68 | fmt.Println(WHT, " v1.1.1", CYN, "░░░ ░░███ ░░░ ░░███ ", WHT, "© hIMEI") 69 | time.Sleep(100 * time.Millisecond) 70 | fmt.Println(CYN, " ███████ ████ ██████ ░███████ ████ ███████ ██████ ████████ ") 71 | time.Sleep(100 * time.Millisecond) 72 | fmt.Println(CYN, " ███░░███░░███ ███░░███ ░███░░███ ░░███ ███░░███ ░░░░░███ ░░███░░███ ") 73 | time.Sleep(100 * time.Millisecond) 74 | fmt.Println(CYN, "░███ ░███ ░███ ░███ ░░░ ░███ ░███ ░███ ░███ ░███ ███████ ░███ ░███ ") 75 | time.Sleep(100 * time.Millisecond) 76 | fmt.Println(CYN, "░███ ░███ ░███ ░███ ███ ░███ ░███ ░███ ░███ ░███ ███░░███ ░███ ░███ ") 77 | time.Sleep(100 * time.Millisecond) 78 | fmt.Println(CYN, "░░███████ █████░░██████ ████ █████ █████░░████████░░████████ ████ █████") 79 | time.Sleep(100 * time.Millisecond) 80 | fmt.Println(CYN, " ░░░░░███░░░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░ ░░░░░░░░ ░░░░░░░░ ░░░░ ░░░░░ ") 81 | time.Sleep(100 * time.Millisecond) 82 | fmt.Println(CYN, " ███ ░███ ", GRN, "___onion secrets for console cowboys___") 83 | time.Sleep(100 * time.Millisecond) 84 | fmt.Println(CYN, "░░██████") 85 | time.Sleep(100 * time.Millisecond) 86 | fmt.Println(CYN, "░░░░░░", RESET) 87 | } 88 | -------------------------------------------------------------------------------- /data.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 hIMEI 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | /** file data.go contains types and its methods for representing of collected data*/ 16 | 17 | package main 18 | 19 | import ( 20 | "encoding/json" 21 | "fmt" 22 | ) 23 | 24 | // Host struct is a basic data type 25 | type Host struct { 26 | // HostUrl is an url of host 27 | HostUrl string `json:"hosturl"` 28 | // AddDate is a date in which host was added to Ichidan index 29 | AddDate string `json:"adddate"` 30 | // PrimaryRequest is a request starter, e.g. search word 31 | PrimaryRequest string `json:"request"` 32 | // Services on host 33 | Services []*Service `json:"services"` 34 | } 35 | 36 | // Service contains all info about found Host 37 | type Service struct { 38 | // Name is a service name: "OpenSSH" or "Apache httpd" for example 39 | Name string `json:"name"` 40 | // Port is a service listening port 41 | Port string `json:"port"` 42 | // Protocol is a service protocol 43 | Protocol string `json:"protocol"` 44 | // State is a service state: "http" or "ssh" for example 45 | State string `json:"state"` 46 | // Version is a service version if parsed 47 | Version string `json:"version"` 48 | // ServDetails is a
 tag's content of parsed page
49 | 	ServDetails string `json:"servdetails"`
50 | }
51 | 
52 | // NewService is a constructor for Service struct
53 | func NewService(fields []string) *Service {
54 | 	service := &Service{fields[0], fields[1], fields[2], fields[3], fields[4], fields[5]}
55 | 
56 | 	return service
57 | }
58 | 
59 | // NewHost is a constructor for Host struct
60 | func NewHost(fields []string, services []*Service) *Host {
61 | 	host := &Host{fields[0], fields[1], fields[2], services}
62 | 
63 | 	return host
64 | }
65 | 
66 | // String is a Stringer implementation for Service to output
67 | func (s *Service) String() string {
68 | 	return fmt.Sprintf("%s\n %s\n %s\n %s\n %s\n %s\n",
69 | 		s.Name, s.Port, s.Protocol, s.State, s.Version, s.ServDetails)
70 | }
71 | 
72 | // String is a Stringer implementation for Host to output
73 | func (h *Host) String() string {
74 | 	var servs string
75 | 
76 | 	for _, s := range h.Services {
77 | 		servs += s.String() + "\n"
78 | 	}
79 | 
80 | 	return fmt.Sprintf("%s\n %s\n %s\n", h.HostUrl, h.AddDate, servs)
81 | }
82 | 
83 | // HostToJson converts output to JSON
84 | func (host *Host) hostToJson() []byte {
85 | 	nosj, err := json.Marshal(host)
86 | 	ErrFatal(err)
87 | 
88 | 	return nosj
89 | }
90 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 hIMEI
  2 | 
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | /*
 16 | Description
 17 | 
 18 | Package gichidan is a console utility that is a wrapper for the Ichidan deep-web search engine.
 19 | The purpose of the application is to collect information about hosts in Tor Network, e.g. in .onion
 20 | zone.
 21 | 
 22 | A little information about Ichidan. The search engine is less like Google and more like Shodan,
 23 | in that it allows users to see technical information about .onion websites, including softwares names,
 24 | services details, used protocols, connected network interfaces, such as TCP/IP ports.
 25 | 
 26 | Details
 27 | 
 28 | As Ichidan is located in .onion zone too, Gichidan uses package github.com/hIMEI29A/gotorsocks
 29 | for making requests through Tor proxy.
 30 | 
 31 | When app receives response from search engine, it asynchronously parses all results with Golang
 32 | concurrency model, even if result's pagination contains a lot of web pages.
 33 | 
 34 | Since v1.0.0 search with logical expressions is implemented. See details below.
 35 | 
 36 | Dependencies
 37 | 
 38 |     github.com/antchfx/htmlquery
 39 |     github.com/antchfx/xpath
 40 |     github.com/hIMEI29A/gotorsocks
 41 |     golang.org/x/net/html
 42 | 
 43 | Usage
 44 | 
 45 | Gichidan's CLI options are:
 46 | 
 47 |     gichidan
 48 |         -b    show ASCII banner
 49 |         -f string
 50 |               save results to file
 51 |         -h    help message
 52 |         -j    convert output to json
 53 |         -m    Don't print GET request's messages (non-verbose output)
 54 |         -r string
 55 |               your search request to Ichidan
 56 |         -s    print hosts urls only
 57 |         -v    print current version
 58 | 
 59 | You may search with app by keyword, by software name, by network protocol and by many others things.
 60 | In most cases, Gichidan cannot search by url as main search engine cannot too. But you may try it.
 61 | 
 62 | Examples
 63 | 
 64 | To get usage help, type in console:
 65 | 
 66 |       gichidan -h
 67 | 
 68 | To get current app's version number (1.1.1), try
 69 | 
 70 |       gichidan -v
 71 | 
 72 | To get info about same Ichidan server, type
 73 | 
 74 |       gichidan -r ichidan
 75 | 
 76 | Output:
 77 | 
 78 |     Hosts found:   1
 79 |     Only one page
 80 | 
 81 |     Full info:
 82 | 
 83 |     http://ichidanv34wrx7m7.onion
 84 |       2017-09-18 13:08:58 UTC
 85 |       tcpwrapped
 86 |       80
 87 |       tcp
 88 |       tcpwrapped
 89 |       unknown VERSION
 90 | 
 91 |     http-headers:
 92 | 
 93 |         Server: nginx/1.10.3 (Ubuntu)
 94 |         Date: Mon, 18 Sep 2017 13:08:55 GMT
 95 |         Content-Type: text/html; charset=utf-8
 96 |         Connection: close
 97 |         X-Frame-Options: SAMEORIGIN
 98 |         X-XSS-Protection: 1; mode=block
 99 |         X-Content-Type-Options: nosniff
100 |         ETag: W/"7e087af022204d46cb9b655936aa2915"
101 |         Cache-Control: max-age=0, private, must-revalidate
102 |         Set-Cookie: _ichidan_session=NXQ5NWc4ZmJiSHRnVVM2TDFmblVzcmo4NnY1aUdtUFZFY0VmcVpCTz
103 |         JHUUx2T25XOUhKa0hMT2F4QS9LanVEMGNYeXlKaEwyNGFITjA1bjdsSE1PRnR3TTIrNEJuc3dtMS9JczM1c3haL0
104 |         xsa0U5K3E4RytSbHNWakxYVTdhYmZ3dFdhRGhzTWR4SXdlT2VhMlhFRzNRPT0tLWpiOU9SMFJnbTFXeTJFamN6Q3
105 |         FmU3c9PQ%3D%3D--6281f0c900799f334e5f8eb76589c89c38212d37; path=/; HttpOnly
106 |         X-Request-Id: 1e002391-0137-41e1-83cd-acc6b69b5019
107 |         X-Runtime: 0.005388
108 |         (Request type: HEAD)
109 | 
110 |     http-server-header:
111 |         nginx/1.10.3 (Ubuntu)
112 |     http-title:
113 |         Ichidan
114 | 
115 | To collect info about .onion sites which have "paypal" keyword in metatags, and save it to file, try:
116 | 
117 |     gichidan -r paypal -f ~/my_folder/paypal_search.txt
118 | 
119 | You may want to know about .onion Raspberry Pi hosts with Raspbian OS?
120 | 
121 |     gichidan -r raspbian
122 | 
123 | There is many private XMPP(Jabber) servers in Tor network. To know about it, type in console:
124 | 
125 |     gichidan -r xmpp
126 | 
127 | Or to collect info about Prosody XMPP servers only:
128 | 
129 |     gichidan -r prosody
130 | 
131 | To run program in non-verbose ("mute") mode, use `-m` flag. GET requests messages
132 | will not be printed in this case:
133 | 
134 |     gichidan -r accounts -m
135 | 
136 | To print oldschool ASCII banner before crawling start, use `-b` flag:
137 | 
138 |     gichidan -r ejabberd -b
139 | 
140 | If you don't want to see all details info about collected servers, use -s ("short") option:
141 | 
142 |     gichidan -r ssh -s
143 | 
144 | In case of short info and output to file mode, your file will contains all details anymore
145 | 
146 |     gichidan -r apache -s -f ~/my_folder/paypal_search.txt
147 | 
148 | Use `-j` flag to convert output to JSON format. This feature cannot use with `-s` flag.
149 | In case `-j` and `-s` provided both there will be error. You may also save JSON to
150 | file with `-f` option.
151 | 
152 |     gichidan -r CentOS -j -f
153 | 
154 | Try to search by URL:
155 | 
156 |     gichidan -r facebookcorewwwi.onion
157 | 
158 | If Ichidan can not find anything by your request, application  will display error:
159 | 
160 |     gichidan -r jdfhchgbverugbvcevcegrfvcew
161 | 
162 | Output:
163 | 
164 |     2013/01/20 16:12:12 Nothing found there!
165 | 
166 | Logical operators (NEW)
167 | 
168 | Here is a simple rules for its usage:
169 | 
170 | Expression MUST contain no more than two words (_yet_) with an operator between them and
171 | MUST NOT contain spaces between words and operator. Operators are:
172 | 
173 |     AND "+"
174 |     NOT "-"
175 |     OR  "="
176 | 
177 | Examples:
178 | 
179 | It will show only results which satisfy "prosody" and "ejabberd" requests both:
180 | 
181 |     gichidan -r prosody+ejabberd
182 | 
183 | It will show only results of "paypal" request wich not satisfy "crime" request:
184 | 
185 |     gichidan -r paypal-crime
186 | 
187 | It will show results of "bbs" and "telnet" requests separately:
188 | 
189 |     gichidan -r bbs=telnet
190 | 
191 | If search engine cannot find anything by one of words, application  will display error:
192 | 
193 |     gichidan -r ssh+jdfhchgbverugbvcevcegrfvcew
194 | 
195 | Notes
196 | 
197 | Tor network may be slow. In case of long delay, restart Tor:
198 | 
199 |     sudo service tor restart
200 | */
201 | package main
202 | 


--------------------------------------------------------------------------------
/glide.yaml:
--------------------------------------------------------------------------------
1 | package: github.com/hIMEI29A/gichidan
2 | import:
3 | - package: github.com/hIMEI29A/gotorsocks
4 |   version: ^0.3.1
5 | - package: golang.org/x/net/html
6 | - package: github.com/antchfx/htmlquery
7 | - package: github.com/antchfx/xpath


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 hIMEI
  2 | 
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | package main
 16 | 
 17 | import (
 18 | 	"errors"
 19 | 	"flag"
 20 | 	"fmt"
 21 | 	"os"
 22 | 	"path"
 23 | 	"sync"
 24 | 
 25 | 	"golang.org/x/net/html"
 26 | )
 27 | 
 28 | var (
 29 | 	requestFlag   = flag.String("r", "", "your search request to Ichidan")
 30 | 	shortInfoFlag = flag.Bool("s", false, "print hosts urls only")
 31 | 
 32 | 	// Save output to file
 33 | 	outputFlag = flag.String("f", "", "save results to file")
 34 | 	Parsed     []*Host
 35 | 	Filepath   string
 36 | 
 37 | 	// Jsoned otput
 38 | 	jsonFlag = flag.Bool("j", false, "convert output to json")
 39 | 
 40 | 	// Version flag gets current app's version
 41 | 	version     = "1.1.1"
 42 | 	versionFlag = flag.Bool("v", false, "print current version")
 43 | 
 44 | 	// Print ASCII banner for oldschool guys
 45 | 	bannerFlag = flag.Bool("b", false, "show ASCII banner")
 46 | 
 47 | 	// Don't print GET request's messages
 48 | 	muteFlag = flag.Bool("m", false, "Don't print GET request's messages (non-verbose output)")
 49 | 
 50 | 	helpCmd = flag.Bool("h", false, "help message")
 51 | )
 52 | 
 53 | // ToFile saves results to given file.
 54 | func toFile(filepath string, toJson bool, parsed []*Host) {
 55 | 	dir := path.Dir(filepath)
 56 | 
 57 | 	if _, err := os.Stat(dir); os.IsNotExist(err) {
 58 | 		errString := makeErrString(NOTEXIST)
 59 | 		newerr := errors.New(errString)
 60 | 		ErrFatal(newerr)
 61 | 	}
 62 | 
 63 | 	if _, err := os.Stat(filepath); os.IsExist(err) {
 64 | 		errString := makeErrString(EXIST)
 65 | 		newerr := errors.New(errString)
 66 | 		ErrFatal(newerr)
 67 | 	}
 68 | 
 69 | 	file, err := os.OpenFile(filepath, os.O_RDWR|os.O_CREATE, 0666)
 70 | 	ErrFatal(err)
 71 | 	defer file.Close()
 72 | 
 73 | 	for i := range parsed {
 74 | 		if toJson == false {
 75 | 			file.WriteString(parsed[i].String() + "\n\n\n")
 76 | 			ErrFatal(err)
 77 | 		} else {
 78 | 			file.Write(parsed[i].hostToJson())
 79 | 		}
 80 | 	}
 81 | }
 82 | 
 83 | func main() {
 84 | 	// Cli options parsing
 85 | 	flag.Parse()
 86 | 
 87 | 	if len(os.Args) == 1 {
 88 | 		flag.PrintDefaults()
 89 | 		os.Exit(1)
 90 | 	}
 91 | 
 92 | 	if *versionFlag {
 93 | 		fmt.Println(version)
 94 | 		os.Exit(1)
 95 | 	}
 96 | 
 97 | 	if *bannerFlag {
 98 | 		banner()
 99 | 	}
100 | 
101 | 	if *requestFlag == "" {
102 | 		flag.PrintDefaults()
103 | 		os.Exit(1)
104 | 	}
105 | 
106 | 	if *outputFlag != "" {
107 | 		Filepath = *outputFlag
108 | 	}
109 | 
110 | 	var (
111 | 		parsedHosts []*Host
112 | 		rootHosts   = make(map[string]string)
113 | 		mutex       = &sync.Mutex{}
114 | 		// len(parsedHosts) must be less than totalHosts at the start of crawling
115 | 		totalHosts = 1
116 | 	)
117 | 
118 | 	// Channels
119 | 	var (
120 | 		channelBody = make(chan map[string]*html.Node, BUFFSIZE)
121 | 		chanUrls    = make(chan string, BUFFSIZE)
122 | 		chanHost    = make(chan []*Host, BUFFSIZE)
123 | 	)
124 | 
125 | 	// Actors
126 | 	var (
127 | 		s = NewSpider()
128 | 		p = NewParser()
129 | 	)
130 | 
131 | 	request := NewRequest(*requestFlag)
132 | 
133 | 	// Start crawling
134 | 	for _, req := range request.RequestStrings {
135 | 		go s.Crawl(req, channelBody)
136 | 	}
137 | 
138 | 	fmt.Println(makeMessage(WAIT))
139 | 	if *muteFlag {
140 | 		SLEEPER()
141 | 		fmt.Println(makeMessage(CONN))
142 | 	}
143 | 
144 | 	for len(parsedHosts) < totalHosts {
145 | 		select {
146 | 		case recievedNode := <-channelBody:
147 | 			primUrl, hostNode := unMap(recievedNode)
148 | 			if s.checkRoot(hostNode) == true {
149 | 				total := p.getTotal(hostNode)
150 | 				rootHosts[primUrl] = total
151 | 				// Get total number of all hosts. If here is first found root page,
152 | 				// totalHosts value must be decremented for happy loop exiting.
153 | 				if len(rootHosts) == 1 {
154 | 					totalHosts += (toInt(total) - 1)
155 | 				}
156 | 
157 | 				if len(rootHosts) > 1 {
158 | 					totalHosts += toInt(total)
159 | 				}
160 | 			}
161 | 
162 | 			go s.getPagination(hostNode, chanUrls)
163 | 			go p.parseOne(recievedNode, chanHost)
164 | 
165 | 		case newUrl := <-chanUrls:
166 | 			// Check if link was visited
167 | 			mutex.Lock()
168 | 			if s.HandledUrls[newUrl] == false {
169 | 				go s.Crawl(newUrl, channelBody)
170 | 				s.HandledUrls[newUrl] = true
171 | 				SLEEPER()
172 | 
173 | 				// verbose output
174 | 				if !*muteFlag {
175 | 					fmt.Println(makeValMessage(newUrl), makeMessage(PROCESSING))
176 | 				}
177 | 
178 | 			} else {
179 | 			}
180 | 			mutex.Unlock()
181 | 
182 | 		case newhosts := <-chanHost:
183 | 			for i := range newhosts {
184 | 				parsedHosts = append(parsedHosts, newhosts[i])
185 | 
186 | 			}
187 | 		}
188 | 	}
189 | 
190 | 	finalHosts := request.resultProvider(parsedHosts)
191 | 
192 | 	fmt.Println(getTotalStats(rootHosts, finalHosts, totalHosts))
193 | 
194 | 	pressAny()
195 | 
196 | 	// Results output. If shortInfoFlag was parsed, only collected urls will be printed.
197 | 	if !*jsonFlag {
198 | 		if !*shortInfoFlag {
199 | 			fmt.Println(makeMessage(FULL))
200 | 			for i := range finalHosts {
201 | 				fmt.Println(makeUrlMessage(finalHosts[i].String()))
202 | 			}
203 | 		} else {
204 | 			fmt.Println(makeMessage(SHORT))
205 | 			for i := range finalHosts {
206 | 				fmt.Println(makeUrlMessage(finalHosts[i].HostUrl))
207 | 			}
208 | 		}
209 | 	} else {
210 | 		fmt.Println(makeMessage(FULL))
211 | 		for i := range finalHosts {
212 | 			os.Stdout.Write(finalHosts[i].hostToJson())
213 | 			fmt.Println()
214 | 		}
215 | 	}
216 | 
217 | 	if *jsonFlag && *shortInfoFlag {
218 | 		errStr := makeErrString(ERRFLAGS)
219 | 		newerr := errors.New(errStr)
220 | 		ErrFatal(newerr)
221 | 	}
222 | 
223 | 	// Save results to file if flag parsed
224 | 	if Filepath != "" {
225 | 		fmt.Println(makeMessage(SAVED), makeValMessage(Filepath))
226 | 		Parsed = finalHosts
227 | 		if !*jsonFlag {
228 | 			toFile(Filepath, false, Parsed)
229 | 		} else {
230 | 			toFile(Filepath, true, Parsed)
231 | 		}
232 | 	}
233 | 
234 | }
235 | 


--------------------------------------------------------------------------------
/parser.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 hIMEI
  2 | 
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //    http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | /** file parser.go contains data types and methods for HTML content parsing */
 16 | 
 17 | package main
 18 | 
 19 | import (
 20 | 	"strings"
 21 | 
 22 | 	"github.com/antchfx/htmlquery"
 23 | 	"golang.org/x/net/html"
 24 | )
 25 | 
 26 | // Parser is a html and xpath parser
 27 | type Parser struct{}
 28 | 
 29 | // NewParser creates instance of Parser
 30 | func NewParser() *Parser {
 31 | 	parser := &Parser{}
 32 | 
 33 | 	return parser
 34 | }
 35 | 
 36 | // FindEntry finds html element on the page
 37 | func findEntry(node *html.Node, entryexp string) *html.Node {
 38 | 	return htmlquery.FindOne(node, entryexp)
 39 | }
 40 | 
 41 | // FindEntrys finds set of html elements on the page
 42 | func findEntrys(node *html.Node, entryexp string) []*html.Node {
 43 | 	return htmlquery.Find(node, entryexp)
 44 | }
 45 | 
 46 | // GetTag gets inner value of html tag
 47 | func getTag(node *html.Node, tagexp string) string {
 48 | 	return htmlquery.InnerText(findEntry(node, tagexp))
 49 | }
 50 | 
 51 | // GetHref gets content of href attribute of  tag
 52 | func getHref(node *html.Node) string {
 53 | 	return htmlquery.SelectAttr(node, HREF)
 54 | }
 55 | 
 56 | // UnMap extracts key and value from given map. Returns key's string and value's *html.Node
 57 | func unMap(nodeMap map[string]*html.Node) (string, *html.Node) {
 58 | 	var str string
 59 | 	var node *html.Node
 60 | 
 61 | 	for key, value := range nodeMap {
 62 | 		str = key
 63 | 		node = value
 64 | 	}
 65 | 
 66 | 	return str, node
 67 | }
 68 | 
 69 | // CheckPage returns true if page is a root page and false if it is a host details page
 70 | func (p *Parser) checkPage(node *html.Node) bool {
 71 | 	ch := false
 72 | 
 73 | 	result := findEntry(node, SEARCHRESULT)
 74 | 	if result != nil {
 75 | 		ch = true
 76 | 	}
 77 | 
 78 | 	return ch
 79 | }
 80 | 
 81 | // ParseOne parses given *html.Node and creates slice of *Host
 82 | func (p *Parser) parseOne(node map[string]*html.Node, chanHost chan []*Host) {
 83 | 	var hosts []*Host
 84 | 
 85 | 	url, hostNode := unMap(node)
 86 | 
 87 | 	hostsNodes := p.getHosts(hostNode)
 88 | 
 89 | 	for _, h := range hostsNodes {
 90 | 		fields := p.getHostFields(h)
 91 | 		fields = append(fields, trimString(url))
 92 | 
 93 | 		var services []*Service
 94 | 
 95 | 		detailslink := getHref(findEntry(h, DETAILS))
 96 | 		req := NewRequest(detailslink)
 97 | 
 98 | 		chanNode := getContents(req.RequestStrings[0])
 99 | 		dnode := <-chanNode
100 | 
101 | 		srvNodes := findEntrys(dnode, SERVICELONG)
102 | 
103 | 		for _, srv := range srvNodes {
104 | 			srvFields := p.getServiceFields(srv)
105 | 			service := NewService(srvFields)
106 | 			services = append(services, service)
107 | 		}
108 | 
109 | 		host := NewHost(fields, services)
110 | 		hosts = append(hosts, host)
111 | 	}
112 | 
113 | 	chanHost <- hosts
114 | 
115 | 	return
116 | }
117 | 
118 | // GetHostFields collects all data for Host struct creating
119 | // and returns it as []string
120 | func (p *Parser) getHostFields(node *html.Node) []string {
121 | 	var fields []string
122 | 
123 | 	hostUrl := getHref(findEntry(findEntry(findEntry(node, SUMMARY), ONION), LINK))
124 | 	fields = append(fields, hostUrl)
125 | 
126 | 	addDate := strings.TrimPrefix(getTag(findEntry(node, SUMMARY), SPAN), ADDED)
127 | 	fields = append(fields, addDate)
128 | 
129 | 	return fields
130 | }
131 | 
132 | // GetTotal gets results total number
133 | func (p *Parser) getTotal(root *html.Node) string {
134 | 	total := trimString(getTag(root, TOTAL))
135 | 
136 | 	return total
137 | }
138 | 
139 | // GetServiceFields collects all data for Service struct creating
140 | // and returns it as []string
141 | func (p *Parser) getServiceFields(node *html.Node) []string {
142 | 	var fields []string
143 | 
144 | 	// Service name
145 | 	if findEntry(node, H3) != nil {
146 | 		fields = append(fields, trimString(getTag(node, H3)))
147 | 	} else {
148 | 		fields = append(fields, trimString(getTag(node, STATE)))
149 | 	}
150 | 
151 | 	// Service port
152 | 	fields = append(fields, trimString(getTag(node, PORT)))
153 | 	// Service protocol
154 | 	fields = append(fields, trimString(getTag(node, PROTO)))
155 | 	// Service state
156 | 	fields = append(fields, trimString(getTag(node, STATE)))
157 | 
158 | 	// Service version
159 | 	if findEntry(node, VERSION) != nil {
160 | 		fields = append(fields, trimString(getTag(node, VERSION)))
161 | 	} else {
162 | 		fields = append(fields, "unknown VERSION")
163 | 	}
164 | 
165 | 	// Service details, e.g. ServDetails
166 | 	pre := p.getPre(node)
167 | 	fields = append(fields, pre)
168 | 
169 | 	return fields
170 | }
171 | 
172 | //GetPre gets 
 tag's content of parsed page and trims all "\t" symbols from it.
173 | func (p *Parser) getPre(node *html.Node) string {
174 | 	pre := getTag(node, PRE)
175 | 	splitted := strings.Split(pre, "\n")
176 | 
177 | 	for i := range splitted {
178 | 		splitted[i] = trimString(strings.TrimPrefix(splitted[i], "\t"))
179 | 	}
180 | 
181 | 	newPre := strings.Join(splitted, "\n")
182 | 
183 | 	return newPre
184 | }
185 | 
186 | // GetServices gets 
's of class "service" 187 | func (p *Parser) getService(node *html.Node) []*html.Node { 188 | return findEntrys(node, SERVICE) 189 | } 190 | 191 | // GetHosts gets data to instantiate Host structs 192 | func (p *Parser) getHosts(node *html.Node) []*html.Node { 193 | return findEntrys(node, HOST) 194 | } 195 | -------------------------------------------------------------------------------- /request.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 hIMEI 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | /** file request.go contains data types and methods for http requests creating */ 16 | 17 | package main 18 | 19 | import ( 20 | "strings" 21 | ) 22 | 23 | // MakeLogicRequest parses given string for logical operators and create request with its if found 24 | func makeLogicRequest(req string) ([]string, []string) { 25 | var fullr, primr []string 26 | 27 | for _, l := range LOGIC { 28 | if strings.Contains(req, l) == true { 29 | splitted := strings.Split(req, l) 30 | 31 | for _, s := range splitted { 32 | reqString := "GET " + SEARCH + s + "\n" 33 | fullr = append(fullr, reqString) 34 | primr = append(primr, trimString(s)) 35 | } 36 | } 37 | } 38 | 39 | return fullr, primr 40 | } 41 | 42 | // TrimUrl takes string as argument and cuts everything but primary request 43 | func trimUrl(url string) string { 44 | splitted := strings.Split(url, "query") 45 | primary := trimString(strings.TrimPrefix(splitted[1], "=")) 46 | 47 | return primary 48 | } 49 | 50 | // Request is a data type for representing requests to search engine 51 | type Request struct { 52 | // RequestStrings contains prepared GET request(s) 53 | RequestStrings []string 54 | // PrimaryStrings contains searched words 55 | PrimaryStrings []string 56 | // Operator is a logical operator (NOT, OR and AND ) in case of logic request 57 | Operator string 58 | } 59 | 60 | // NewRequest creates instance of Request type 61 | func NewRequest(req string) *Request { 62 | request := &Request{} 63 | var fullRequest []string 64 | var primStrings []string 65 | var op string 66 | 67 | switch { 68 | // Case for program's inner logic 69 | case string(req[0]) == "/": 70 | reqString := "GET " + req + "\n" 71 | fullRequest = append(fullRequest, reqString) 72 | 73 | // Case for program's inner logic 74 | case string(req[0]) != "/" && 75 | string(req[0]) != NONE && 76 | strings.Contains(req, NONE) == true: 77 | 78 | splitted := strings.Split(req, NONE) 79 | reqString := "GET " + SEARCH + splitted[0] + "\n" 80 | fullRequest = append(fullRequest, reqString) 81 | 82 | // Search with operators 83 | case strings.Contains(req, AND) == true: 84 | fullr, primr := makeLogicRequest(req) 85 | fullRequest = fullr 86 | primStrings = primr 87 | 88 | op = AND 89 | 90 | // Search with operators 91 | case strings.Contains(req, OR) == true: 92 | fullr, primr := makeLogicRequest(req) 93 | fullRequest = fullr 94 | primStrings = primr 95 | 96 | op = OR 97 | 98 | // Search with operators 99 | case strings.Contains(req, NOT) == true: 100 | fullr, primr := makeLogicRequest(req) 101 | fullRequest = fullr 102 | primStrings = primr 103 | 104 | op = NOT 105 | 106 | // Default is a case without search operators, e.g. "gichidan -r ichidan" 107 | default: 108 | reqString := "GET " + SEARCH + req + "\n" 109 | fullRequest = append(fullRequest, reqString) 110 | primStrings = append(primStrings, req) 111 | } 112 | 113 | request.RequestStrings = fullRequest 114 | request.PrimaryStrings = primStrings 115 | request.Operator = op 116 | 117 | return request 118 | } 119 | 120 | // InRange checks if given slice of *Host contains given Host 121 | func (r *Request) inRange(host *Host, hosts []*Host) bool { 122 | check := false 123 | 124 | for i := range hosts { 125 | if hosts[i].HostUrl == host.HostUrl { 126 | check = true 127 | break 128 | } 129 | } 130 | 131 | return check 132 | } 133 | 134 | // SortResult sorts received hosts by its primary request's strings 135 | func (r *Request) splitResult(hosts []*Host) chan []*Host { 136 | // Channel for output 137 | chHosts := make(chan []*Host, 2) 138 | 139 | go func() { 140 | var ( 141 | hostsFirst []*Host 142 | hostsSec []*Host 143 | ) 144 | 145 | if len(r.PrimaryStrings) > 1 { 146 | for i := range hosts { 147 | if hosts[i].PrimaryRequest == r.PrimaryStrings[0] { 148 | hostsFirst = append(hostsFirst, hosts[i]) 149 | } 150 | 151 | if hosts[i].PrimaryRequest == r.PrimaryStrings[1] { 152 | hostsSec = append(hostsSec, hosts[i]) 153 | } 154 | } 155 | } 156 | if len(r.PrimaryStrings) == 1 { 157 | hostsFirst = hosts 158 | } 159 | 160 | chHosts <- hostsFirst 161 | chHosts <- hostsSec 162 | }() 163 | 164 | return chHosts 165 | } 166 | 167 | // ResultProvider makes logical operations NOT, OR and AND against found hosts. 168 | func (r *Request) resultProvider(hosts []*Host) []*Host { 169 | var finalHosts []*Host 170 | 171 | chHosts := r.splitResult(hosts) 172 | //hostsFirst, hostsSec := r.sortResult(hosts) 173 | hostsFirst := <-chHosts 174 | hostsSec := <-chHosts 175 | 176 | if len(hostsSec) != 0 { 177 | switch { 178 | case r.Operator == AND: 179 | for i := range hostsFirst { 180 | if r.inRange(hostsFirst[i], hostsSec) == true { 181 | finalHosts = append(finalHosts, hostsFirst[i]) 182 | } 183 | } 184 | 185 | case r.Operator == NOT: 186 | for i := range hostsFirst { 187 | if r.inRange(hostsFirst[i], hostsSec) == false { 188 | finalHosts = append(finalHosts, hostsFirst[i]) 189 | } 190 | } 191 | 192 | case r.Operator == OR: 193 | for i := range hostsFirst { 194 | finalHosts = append(finalHosts, hostsFirst[i]) 195 | } 196 | 197 | for i := range hostsSec { 198 | finalHosts = append(finalHosts, hostsSec[i]) 199 | } 200 | } 201 | } else { 202 | finalHosts = hosts 203 | } 204 | 205 | return finalHosts 206 | } 207 | -------------------------------------------------------------------------------- /spider.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 hIMEI 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | /** file spider.go contains data types and its methods for web-crawling */ 16 | 17 | package main 18 | 19 | import ( 20 | "bufio" 21 | "errors" 22 | "fmt" 23 | "net" 24 | 25 | "github.com/antchfx/htmlquery" 26 | "github.com/hIMEI29A/gotorsocks" 27 | "golang.org/x/net/html" 28 | ) 29 | 30 | // Spider is an async urls handler 31 | type Spider struct { 32 | // Urls already being handled 33 | HandledUrls map[string]bool 34 | } 35 | 36 | // NewSpider is a constructor for Spider 37 | func NewSpider() *Spider { 38 | handled := make(map[string]bool) 39 | spider := &Spider{} 40 | spider.HandledUrls = handled 41 | 42 | return spider 43 | } 44 | 45 | // ConnectProvider provides connect to Ichidan with gotorsocks package 46 | func connectProvider() net.Conn { 47 | tor, err := gotorsocks.NewTorGate() 48 | ErrFatal(err) 49 | 50 | connect, err := tor.DialTor(ICHIDAN) 51 | ErrFatal(err) 52 | 53 | return connect 54 | } 55 | 56 | // GetContents makes request to Ichidan search engine and gets response body 57 | func getContents(request string) chan *html.Node { 58 | chanNode := make(chan *html.Node) 59 | go func() { 60 | connect := connectProvider() 61 | defer connect.Close() 62 | 63 | fmt.Fprintf(connect, request) 64 | resp := bufio.NewReader(connect) 65 | 66 | node, err := htmlquery.Parse(resp) 67 | ErrFatal(err) 68 | chanNode <- node 69 | }() 70 | 71 | return chanNode 72 | } 73 | 74 | // CheckResult controls empty search results 75 | func (s *Spider) checkResult(node *html.Node) bool { 76 | ch := true 77 | 78 | resultNoresult := findEntry(node, NORESULT) 79 | if resultNoresult != nil { 80 | ch = false 81 | } 82 | 83 | return ch 84 | } 85 | 86 | func (s *Spider) checkAuth(node *html.Node) bool { 87 | ch := true 88 | 89 | resultNoauth := findEntry(node, NOAUTH) 90 | if resultNoauth != nil { 91 | ch = false 92 | } 93 | 94 | return ch 95 | } 96 | 97 | // CheckRoot checks if given page is first or single page 98 | func (s *Spider) checkRoot(node *html.Node) bool { 99 | ch := false 100 | 101 | if s.checkSingle(node) == false || getTag(findEntry(node, PAGINATION), CURRENT) == "1" { 102 | ch = true 103 | } 104 | 105 | return ch 106 | } 107 | 108 | // CheckDone checks last pagination's page 109 | func (s *Spider) checkDone(node *html.Node) bool { 110 | ch := false 111 | 112 | pagination := findEntry(node, PAGINATION) 113 | 114 | if findEntry(pagination, DISABLED) != nil { 115 | ch = true 116 | } 117 | 118 | return ch 119 | } 120 | 121 | // CheckSingle checks if given page is single (have not pagination) 122 | func (s *Spider) checkSingle(node *html.Node) bool { 123 | ch := true 124 | 125 | if findEntry(node, PAGINATION) == nil { 126 | ch = false 127 | } 128 | 129 | return ch 130 | } 131 | 132 | // Crawl is a async crawler that takes request as first argument, gets it content 133 | // and sends it to channel given as second argument 134 | func (s *Spider) Crawl(url string, channelBody chan map[string]*html.Node) { 135 | bodyMap := make(map[string]*html.Node) 136 | chanNode := getContents(url) 137 | body := <-chanNode 138 | 139 | if s.checkResult(body) == false { 140 | errString := makeErrString(NOTHING) 141 | err := errors.New(errString) 142 | ErrFatal(err) 143 | } 144 | 145 | if s.checkAuth(body) == false { 146 | errString := makeErrString(ERRAUTH) 147 | err := errors.New(errString) 148 | ErrFatal(err) 149 | } 150 | 151 | bodyMap[trimString(trimUrl(url))] = body 152 | 153 | channelBody <- bodyMap 154 | 155 | return 156 | } 157 | 158 | // GetPagination finds pagination
and gets all links from it. 159 | // Also it checks for single-paged result 160 | func (s *Spider) getPagination(node *html.Node, chanUrls chan string) { 161 | pagination := findEntry(node, PAGINATION) 162 | 163 | if pagination != nil { 164 | current := toInt(getTag(pagination, CURRENT)) 165 | hrefs := findEntrys(pagination, LINK) 166 | 167 | for _, newtag := range hrefs { 168 | if htmlquery.InnerText(newtag) != PREVIOUS && 169 | htmlquery.InnerText(newtag) != NEXT && 170 | toInt(htmlquery.InnerText(newtag)) > current { 171 | req := NewRequest(getHref(newtag)) 172 | 173 | chanUrls <- req.RequestStrings[0] 174 | } 175 | } 176 | } else { 177 | fmt.Println(makeMessage(ONLYONE)) 178 | } 179 | 180 | return 181 | } 182 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 hIMEI 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | /** file utils.go contains constants, global vars and some helper functions*/ 16 | 17 | package main 18 | 19 | import ( 20 | "log" 21 | "math/rand" 22 | "strconv" 23 | "strings" 24 | "time" 25 | ) 26 | 27 | // Size of buffered channel 28 | const BUFFSIZE int = 1200 29 | 30 | // Output colorizing 31 | const ( 32 | RED string = "\x1B[31m" 33 | GRN = "\x1B[32m" 34 | YEL = "\x1B[33m" 35 | BLU = "\x1B[34m" 36 | CYN = "\x1B[36m" 37 | WHT = "\x1B[97m" 38 | RESET = "\x1B[0m" 39 | BOLD = "\x1B[1m" 40 | ) 41 | 42 | // Connect to search engine 43 | const ( 44 | ICHIDAN string = "ichidanv34wrx7m7.onion:80" 45 | SEARCH = "/search?query=" 46 | AUTH = "/users/sign_in" 47 | ) 48 | 49 | // Html parsing and logic expressions 50 | const ( 51 | ADDED string = "Added on " 52 | LONGFORM = "2017-09-09 01:30:35 UTC" 53 | PRE = "//pre" 54 | SPAN = "//span" 55 | LINK = "//a" 56 | HREF = "href" 57 | H2 = "//h2" 58 | H3 = "//h3" 59 | VERSION = "//small" 60 | NONE = " " 61 | CURRENT = "//em[@class='current']" 62 | DISABLED = "//span[@class='next_page disabled']" 63 | SEARCHRESULT = "//div[@id='search-results']" 64 | PAGINATION = "//div[@class='pagination']" 65 | DETAILS = "//a[@class='details']" 66 | SUMMARY = "//div[@class='search-result-summary col-xs-4']" 67 | ROW = "//div[@class='row']" 68 | ONION = "//div[@class='onion']" 69 | TOTAL = "//div[@class='bignumber']" 70 | SERVICE = "//div[@class='service']" 71 | SERVICES = "//div[@class='services']" 72 | SERVICELONG = "//li[@class='service service-long']" 73 | SERVICEDETAILS = "//div[@class='service-details col-sm-2']" 74 | HOST = "//div[@class='search-result row-fluid']" 75 | NORESULT = "//div[@class='msg alert alert-info']" 76 | NOAUTH = "//div[@class='alert alert-dismissible alert-danger']" 77 | RESULT = "//div[@class='col-sm-9']" 78 | PORT = "//div[@class='port']" 79 | PROTO = "//div[@class='protocol']" 80 | STATE = "//div[@class='state']" 81 | PRIMARY = "//div[@class='span8 name']" 82 | PREVIOUS = "← Previous" 83 | NEXT = "Next →" 84 | AND = "+" 85 | OR = "=" 86 | NOT = "-" 87 | ) 88 | 89 | // Console messages 90 | const ( 91 | NOTHING string = "Nothing found there, Neo!" 92 | ERRAUTH = "This does not work yet" 93 | ERRFLAGS = "These flags cannot be used together" 94 | ONLYONE = "Only one page" 95 | UNKNOWN = "unknown version" 96 | WAIT = "Waiting for connect..." 97 | PARSING = "All data downloaded. Waiting for parsing" 98 | PROCESSING = "in processing" 99 | RECEIVED = "Respose received" 100 | NOTEXIST = "Given path does not exist" 101 | EXIST = "File already exist, we'll not rewrite it " 102 | FOUND = "Total hosts found: " 103 | BYREQ = "by request " 104 | PRESS = "Press Enter to see details" 105 | FULL = "Full info: " 106 | SHORT = "Short info" 107 | SAVED = "Saved to" 108 | WILL = " will be printed" 109 | CONN = "Connected" 110 | ) 111 | 112 | // LOGIC is a operators for making logic requests 113 | var LOGIC = []string{ 114 | AND, 115 | OR, 116 | NOT, 117 | } 118 | 119 | var SLEEPER = sleeper() 120 | 121 | // Toint converts string to int and handle errors 122 | func toInt(str string) int { 123 | intCount, err := strconv.Atoi(str) 124 | ErrFatal(err) 125 | 126 | return intCount 127 | } 128 | 129 | // iToa converts int to string and handle errors 130 | func iToa(i int) string { 131 | str := strconv.Itoa(i) 132 | 133 | return str 134 | } 135 | 136 | // ErrFatal is the basic errors handler 137 | func ErrFatal(err error) { 138 | if err != nil { 139 | log.Fatal(err) 140 | } 141 | } 142 | 143 | // TrimString trims trailing and leading spaces from string 144 | func trimString(str string) string { 145 | return strings.TrimSpace(str) 146 | } 147 | 148 | // Sleeper is a closure which calls time.Sleep with random time 149 | // range between 300 and 359 milliseconds. It used to avoid server overloading 150 | func sleeper() func() { 151 | return func() { 152 | s := rand.NewSource(time.Now().UnixNano()) 153 | r := rand.New(s) 154 | p := time.Duration(300 + r.Intn(59)) 155 | time.Sleep(p * time.Millisecond) 156 | } 157 | } 158 | --------------------------------------------------------------------------------