├── .gitignore ├── LICENSE ├── README.md ├── doc ├── Makefile └── source │ ├── README.rst │ ├── code │ └── API.rst │ ├── conf.py │ └── index.rst ├── examples └── by_country.py ├── setup.py └── urlquery ├── __init__.py └── api.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build 3 | urlquery/apikey.py 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, 2014 Raphaël Vinot 2 | Copyright (c) 2013, 2014 CIRCL - Computer Incident Response Center Luxembourg 3 | (c/o smile, security made in Lëtzebuerg, Groupement 4 | d'Intérêt Economique) 5 | 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without modification, 9 | are permitted provided that the following conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright notice, 12 | this list of conditions and the following disclaimer. 13 | 2. Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 25 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 26 | OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Readme 2 | ====== 3 | 4 | API to access [urlquery](http://urlquery.net/index.php). 5 | 6 | 7 | Intro to the API 8 | ================ 9 | 10 | The API uses JSON requests and responses. A json string is built with the 11 | parameters and the function to call and POSTed over HTTPS to the API URL. 12 | 13 | The functions to call is put in the "method" key within the JSON string, the 14 | rest of the parameters to the functions has their respective parameter name as 15 | key. The "method" key is required for all API calls. 16 | 17 | The API key isn't always required for API calls, but by not using the key it 18 | will often severely reduce the amount of data that are returned. The data 19 | returned is determined by the key and its associated permissions. 20 | 21 | 22 | Note: The access of the default key (no key) is very limited, even more than 23 | whatis accessible on the public site. 24 | 25 | API calls 26 | ========= 27 | 28 | Common Objects in response structures: 29 | 30 | * IP:: 31 | 32 | { 33 | "addr" : string, 34 | "cc" : string, Country code (NO, UK, SE, DK) etc.. 35 | "country": string, 36 | "asn" : int, ASN number 37 | "as" : string AS string (full name) 38 | } 39 | 40 | * URL:: 41 | 42 | { 43 | "addr" : string, 44 | "fqdn" : string, 45 | "domain": string, 46 | "tld" : string, 47 | "ip" : IP IP object 48 | } 49 | 50 | * SETTINGS:: 51 | 52 | { 53 | "useragent" : string, UserAgent used 54 | "referer" : string, 55 | "pool" : string, Pool of exit nodes used 56 | "access_level": string 57 | } 58 | 59 | * BINBLOB:: 60 | 61 | { 62 | "base64_data" : string, Base64 encoded data 63 | "media_type" : string mime type 64 | } 65 | 66 | 67 | * Example:: 68 | 69 | { 70 | "url": 71 | { 72 | "addr": "www.youtube.com/watch?v=oHg5SJYRHA0", 73 | "ip": 74 | { 75 | "addr": "213.155.151.148", 76 | "cc": "IE", 77 | "country": "Ireland", 78 | "asn": 1299, 79 | "as": "AS1299 TeliaSonera International Carrier" 80 | }, 81 | "fqdn": "www.youtube.com", 82 | "domain": "youtube.com", 83 | "tld": "com" 84 | } 85 | } 86 | 87 | All responses from the API includes a response object. Which holds the status 88 | of the API call. This is called "_response_" 89 | 90 | * RESPONSE:: 91 | 92 | { 93 | "status" : string, "ok" or "error" 94 | "error" : string Error string if applicable 95 | } 96 | 97 | API Key 98 | ======= 99 | 100 | If you have an API Key, put it in apikey.py with the variable name 'key'. 101 | 102 | Gzip 103 | ==== 104 | 105 | To get the responses of the api gzip'ed, change 'gzip_default' to True. 106 | 107 | Dependencies 108 | ============ 109 | 110 | Hard: 111 | 112 | * requests: https://github.com/kennethreitz/Requests 113 | * dateutil 114 | 115 | Optional: 116 | 117 | * jsonsimple 118 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/URLQueryPythonAPI.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/URLQueryPythonAPI.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/URLQueryPythonAPI" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/URLQueryPythonAPI" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /doc/source/README.rst: -------------------------------------------------------------------------------- 1 | ../../README.md -------------------------------------------------------------------------------- /doc/source/code/API.rst: -------------------------------------------------------------------------------- 1 | *** 2 | API 3 | *** 4 | 5 | .. automodule:: urlquery.api 6 | :members: 7 | 8 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # URLQuery Python API documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Nov 19 09:49:06 2012. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | sys.path.insert(0, os.path.abspath('../../')) 21 | 22 | # -- General configuration ----------------------------------------------------- 23 | 24 | # If your documentation needs a minimal Sphinx version, state it here. 25 | #needs_sphinx = '1.0' 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be extensions 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.pngmath', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode'] 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ['_templates'] 33 | 34 | # The suffix of source filenames. 35 | source_suffix = '.rst' 36 | 37 | # The encoding of source files. 38 | #source_encoding = 'utf-8-sig' 39 | 40 | # The master toctree document. 41 | master_doc = 'index' 42 | 43 | # General information about the project. 44 | project = u'URLQuery Python API' 45 | copyright = u'2012, CIRCL' 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | # The short X.Y version. 52 | version = '0.1' 53 | # The full version, including alpha/beta/rc tags. 54 | release = '0.1' 55 | 56 | # The language for content autogenerated by Sphinx. Refer to documentation 57 | # for a list of supported languages. 58 | #language = None 59 | 60 | # There are two options for replacing |today|: either, you set today to some 61 | # non-false value, then it is used: 62 | #today = '' 63 | # Else, today_fmt is used as the format for a strftime call. 64 | #today_fmt = '%B %d, %Y' 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | exclude_patterns = [] 69 | 70 | # The reST default role (used for this markup: `text`) to use for all documents. 71 | #default_role = None 72 | 73 | # If true, '()' will be appended to :func: etc. cross-reference text. 74 | #add_function_parentheses = True 75 | 76 | # If true, the current module name will be prepended to all description 77 | # unit titles (such as .. function::). 78 | #add_module_names = True 79 | 80 | # If true, sectionauthor and moduleauthor directives will be shown in the 81 | # output. They are ignored by default. 82 | #show_authors = False 83 | 84 | # The name of the Pygments (syntax highlighting) style to use. 85 | pygments_style = 'sphinx' 86 | 87 | # A list of ignored prefixes for module index sorting. 88 | #modindex_common_prefix = [] 89 | 90 | 91 | # -- Options for HTML output --------------------------------------------------- 92 | 93 | # The theme to use for HTML and HTML Help pages. See the documentation for 94 | # a list of builtin themes. 95 | html_theme = 'default' 96 | 97 | # Theme options are theme-specific and customize the look and feel of a theme 98 | # further. For a list of options available for each theme, see the 99 | # documentation. 100 | #html_theme_options = {} 101 | 102 | # Add any paths that contain custom themes here, relative to this directory. 103 | #html_theme_path = [] 104 | 105 | # The name for this set of Sphinx documents. If None, it defaults to 106 | # " v documentation". 107 | #html_title = None 108 | 109 | # A shorter title for the navigation bar. Default is the same as html_title. 110 | #html_short_title = None 111 | 112 | # The name of an image file (relative to this directory) to place at the top 113 | # of the sidebar. 114 | #html_logo = None 115 | 116 | # The name of an image file (within the static path) to use as favicon of the 117 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 118 | # pixels large. 119 | #html_favicon = None 120 | 121 | # Add any paths that contain custom static files (such as style sheets) here, 122 | # relative to this directory. They are copied after the builtin static files, 123 | # so a file named "default.css" will overwrite the builtin "default.css". 124 | html_static_path = ['_static'] 125 | 126 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 127 | # using the given strftime format. 128 | #html_last_updated_fmt = '%b %d, %Y' 129 | 130 | # If true, SmartyPants will be used to convert quotes and dashes to 131 | # typographically correct entities. 132 | #html_use_smartypants = True 133 | 134 | # Custom sidebar templates, maps document names to template names. 135 | #html_sidebars = {} 136 | 137 | # Additional templates that should be rendered to pages, maps page names to 138 | # template names. 139 | #html_additional_pages = {} 140 | 141 | # If false, no module index is generated. 142 | #html_domain_indices = True 143 | 144 | # If false, no index is generated. 145 | #html_use_index = True 146 | 147 | # If true, the index is split into individual pages for each letter. 148 | #html_split_index = False 149 | 150 | # If true, links to the reST sources are added to the pages. 151 | #html_show_sourcelink = True 152 | 153 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 154 | #html_show_sphinx = True 155 | 156 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 157 | #html_show_copyright = True 158 | 159 | # If true, an OpenSearch description file will be output, and all pages will 160 | # contain a tag referring to it. The value of this option must be the 161 | # base URL from which the finished HTML is served. 162 | #html_use_opensearch = '' 163 | 164 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 165 | #html_file_suffix = None 166 | 167 | # Output file base name for HTML help builder. 168 | htmlhelp_basename = 'URLQueryPythonAPIdoc' 169 | 170 | 171 | # -- Options for LaTeX output -------------------------------------------------- 172 | 173 | latex_elements = { 174 | # The paper size ('letterpaper' or 'a4paper'). 175 | #'papersize': 'letterpaper', 176 | 177 | # The font size ('10pt', '11pt' or '12pt'). 178 | #'pointsize': '10pt', 179 | 180 | # Additional stuff for the LaTeX preamble. 181 | #'preamble': '', 182 | } 183 | 184 | # Grouping the document tree into LaTeX files. List of tuples 185 | # (source start file, target name, title, author, documentclass [howto/manual]). 186 | latex_documents = [ 187 | ('index', 'URLQueryPythonAPI.tex', u'URLQuery Python API Documentation', 188 | u'CIRCL', 'manual'), 189 | ] 190 | 191 | # The name of an image file (relative to this directory) to place at the top of 192 | # the title page. 193 | #latex_logo = None 194 | 195 | # For "manual" documents, if this is true, then toplevel headings are parts, 196 | # not chapters. 197 | #latex_use_parts = False 198 | 199 | # If true, show page references after internal links. 200 | #latex_show_pagerefs = False 201 | 202 | # If true, show URL addresses after external links. 203 | #latex_show_urls = False 204 | 205 | # Documents to append as an appendix to all manuals. 206 | #latex_appendices = [] 207 | 208 | # If false, no module index is generated. 209 | #latex_domain_indices = True 210 | 211 | 212 | # -- Options for manual page output -------------------------------------------- 213 | 214 | # One entry per manual page. List of tuples 215 | # (source start file, name, description, authors, manual section). 216 | man_pages = [ 217 | ('index', 'urlquerypythonapi', u'URLQuery Python API Documentation', 218 | [u'CIRCL'], 1) 219 | ] 220 | 221 | # If true, show URL addresses after external links. 222 | #man_show_urls = False 223 | 224 | 225 | # -- Options for Texinfo output ------------------------------------------------ 226 | 227 | # Grouping the document tree into Texinfo files. List of tuples 228 | # (source start file, target name, title, author, 229 | # dir menu entry, description, category) 230 | texinfo_documents = [ 231 | ('index', 'URLQueryPythonAPI', u'URLQuery Python API Documentation', 232 | u'CIRCL', 'URLQueryPythonAPI', 'One line description of project.', 233 | 'Miscellaneous'), 234 | ] 235 | 236 | # Documents to append as an appendix to all manuals. 237 | #texinfo_appendices = [] 238 | 239 | # If false, no module index is generated. 240 | #texinfo_domain_indices = True 241 | 242 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 243 | #texinfo_show_urls = 'footnote' 244 | 245 | 246 | # -- Options for Epub output --------------------------------------------------- 247 | 248 | # Bibliographic Dublin Core info. 249 | epub_title = u'URLQuery Python API' 250 | epub_author = u'CIRCL' 251 | epub_publisher = u'CIRCL' 252 | epub_copyright = u'2012, CIRCL' 253 | 254 | # The language of the text. It defaults to the language option 255 | # or en if the language is not set. 256 | #epub_language = '' 257 | 258 | # The scheme of the identifier. Typical schemes are ISBN or URL. 259 | #epub_scheme = '' 260 | 261 | # The unique identifier of the text. This can be a ISBN number 262 | # or the project homepage. 263 | #epub_identifier = '' 264 | 265 | # A unique identification for the text. 266 | #epub_uid = '' 267 | 268 | # A tuple containing the cover image and cover page html template filenames. 269 | #epub_cover = () 270 | 271 | # HTML files that should be inserted before the pages created by sphinx. 272 | # The format is a list of tuples containing the path and title. 273 | #epub_pre_files = [] 274 | 275 | # HTML files shat should be inserted after the pages created by sphinx. 276 | # The format is a list of tuples containing the path and title. 277 | #epub_post_files = [] 278 | 279 | # A list of files that should not be packed into the epub file. 280 | #epub_exclude_files = [] 281 | 282 | # The depth of the table of contents in toc.ncx. 283 | #epub_tocdepth = 3 284 | 285 | # Allow duplicate toc entries. 286 | #epub_tocdup = True 287 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. URLQuery Python API documentation master file, created by 2 | sphinx-quickstart on Mon Nov 19 09:49:06 2012. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to URLQuery Python API's documentation! 7 | =============================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | README 15 | 16 | 17 | Code Documentation 18 | ================== 19 | 20 | .. toctree:: 21 | :maxdepth: 1 22 | 23 | code/API 24 | 25 | 26 | 27 | Indices and tables 28 | ================== 29 | 30 | * :ref:`genindex` 31 | * :ref:`modindex` 32 | * :ref:`search` 33 | 34 | -------------------------------------------------------------------------------- /examples/by_country.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | import re 6 | import urlquery 7 | import json 8 | import smtplib 9 | from email.mime.text import MIMEText 10 | import time 11 | import datetime 12 | 13 | c = 'Luxembourg' 14 | cc = 'LU' 15 | tld = 'lu' 16 | sender = 'urlquery@example.com' 17 | to = 'dest@example.com' 18 | smtp_server = 'smtp_server' 19 | 20 | def get_country(): 21 | feed = urlquery.urlfeed() 22 | entries = [] 23 | for url in feed.get('feed'): 24 | if type(url) != type({}): 25 | continue 26 | if url.get('tld') == tld or url.get('ip').get('cc') == cc 27 | or url.get('ip').get('country') == c: 28 | entries.append(url) 29 | return entries 30 | 31 | def prepare_mail(entry): 32 | to_return = {} 33 | to_return['subject'] = \ 34 | 'UrlQuery report for {}'.format(entry.get('ip').get('addr')) 35 | to_return['body'] = json.dumps(entry, sort_keys=True, indent=4) 36 | reports = urlquery.search(entry.get('ip').get('addr'), 37 | urlquery_from = datetime.datetime.now() - datetime.timedelta(hours=1)) 38 | # FIXME: the output of a search is undefined 39 | if reports is None: 40 | response = urlquery.submit(entry['url']) 41 | queue_id = response.get('queue_id') 42 | report_id = response.get('report_id') 43 | i = 0 44 | while report_id is None: 45 | print 'Waiting for', entry.get('url').get('addr') 46 | time.sleep(30) 47 | response = urlquery.queue_status(queue_id) 48 | report_id = response.get('report_id') 49 | i += 1 50 | if i >= 5: 51 | return to_return 52 | full_report = urlquery.report(status['report_id'], include_details=True) 53 | to_return['body'] += '\n' + json.dumps(full_report, 54 | sort_keys=True, indent=4) 55 | else: 56 | for report in reports: 57 | try: 58 | full_report = urlquery.report(report['id'], include_details=True) 59 | to_return['body'] += '\n' + json.dumps(full_report, 60 | sort_keys=True, indent=4) 61 | except: 62 | print report 63 | return to_return 64 | 65 | def send_mail(content): 66 | msg = MIMEText(content['body']) 67 | msg['Subject'] = content['subject'] 68 | msg['From'] = sender 69 | msg['To'] = to 70 | s = smtplib.SMTP(smtp_server) 71 | s.sendmail(sender, [to], msg.as_string()) 72 | s.quit() 73 | 74 | if __name__ == '__main__': 75 | 76 | while True: 77 | print 'URL Feed and reports...' 78 | try: 79 | entries = get_country() 80 | ips = [] 81 | 82 | for e in entries: 83 | if e.get('ip').get('addr') in ips: 84 | continue 85 | ips.append(e.get('ip').get('addr')) 86 | mail_content = prepare_mail(e) 87 | send_mail(mail_content) 88 | print 'Done, waiting 3500s' 89 | time.sleep(3500) 90 | except Exception, e: 91 | print 'Something failed.' 92 | print e 93 | time.sleep(200) 94 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from distutils.core import setup 4 | 5 | setup( 6 | name='urlquery', 7 | version='2.1', 8 | description='Python API to access urlquery v2', 9 | url='https://github.com/CIRCL/urlquery-py', 10 | author='Raphaël Vinot', 11 | author_email='raphael.vinot@circl.lu', 12 | maintainer='Raphaël Vinot', 13 | maintainer_email='raphael.vinot@circl.lu', 14 | packages=['urlquery'], 15 | classifiers=[ 16 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 17 | 'Development Status :: 5 - Production/Stable', 18 | 'Environment :: Console', 19 | 'Intended Audience :: Science/Research', 20 | 'Intended Audience :: Telecommunications Industry', 21 | 'Programming Language :: Python :: 2.7', 22 | 'Programming Language :: Python :: 3', 23 | 'Topic :: Security', 24 | 'Topic :: Internet', 25 | ], 26 | install_requires=['requests', 'python-dateutil'], 27 | ) 28 | -------------------------------------------------------------------------------- /urlquery/__init__.py: -------------------------------------------------------------------------------- 1 | from .api import * 2 | -------------------------------------------------------------------------------- /urlquery/api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | try: 5 | import simplejson as json 6 | except: 7 | import json 8 | 9 | import requests 10 | from dateutil.parser import parse 11 | from datetime import datetime, timedelta 12 | import time 13 | try: 14 | from .api_key import key 15 | except: 16 | key = '' 17 | 18 | base_url = 'https://uqapi.net/v3/json' 19 | gzip_default = False 20 | 21 | __feed_type = ['unfiltered', 'flagged'] 22 | __intervals = ['hour', 'day'] 23 | __priorities = ['urlfeed', 'low', 'medium', 'high'] 24 | __search_types = ['string', 'regexp', 'ids_alert', 'urlquery_alert', 'js_script_hash'] 25 | __result_types = ['reports', 'url_list'] 26 | __url_matchings = ['url_host', 'url_path'] 27 | __access_levels = ['public', 'nonpublic', 'private'] 28 | 29 | 30 | def __set_default_values(gzip=False): 31 | to_return = {} 32 | to_return['key'] = key 33 | if gzip_default or gzip: 34 | to_return['gzip'] = True 35 | return to_return 36 | 37 | 38 | def __query(query, gzip=False): 39 | if query.get('error') is not None: 40 | return query 41 | query.update(__set_default_values(gzip)) 42 | r = requests.post(base_url, data=json.dumps(query)) 43 | return r.json() 44 | 45 | 46 | def urlfeed(feed='unfiltered', interval='hour', timestamp=None): 47 | """ 48 | The urlfeed function is used to access the main feed of URL from 49 | the service. Currently there are two distinct feed: 50 | 51 | 52 | :param feed: Currently there are two distinct feed: 53 | 54 | * *unfiltered*: contains all URL received by the service, as 55 | with other API calls some restrictions to the feed might 56 | apply depending. (default) 57 | * *flagged*: contains URLs flagged by some detection by 58 | urlquery, it will not contain data triggered by IDS 59 | alerts as that not possible to correlate correctly to a 60 | given URL. Access to this is currently restricted. 61 | 62 | :param interval: Sets the size of time window. 63 | * *hour*: splits the day into 24 slices which each 64 | goes from 00-59 of every hour, 65 | for example: 10:00-10:59 (default) 66 | * *day*: will return all URLs from a given date 67 | 68 | :param timestamp: This selects which slice to return. 69 | Any timestamp within a given interval/time 70 | slice can be used to return URLs from that 71 | timeframe. (default: now) 72 | 73 | 74 | :return: URLFEED 75 | 76 | { 77 | "start_time" : string, 78 | "end_time" : string, 79 | "feed" : [URLs] Array of URL objects (see README) 80 | } 81 | 82 | """ 83 | query = {'method': 'urlfeed'} 84 | if feed not in __feed_type: 85 | query.update({'error': 'Feed can only be in ' + ', '.join(__feed_type)}) 86 | if interval not in __intervals: 87 | query.update({'error': 'Interval can only be in ' + ', '.join(__intervals)}) 88 | if timestamp is None: 89 | ts = datetime.now() 90 | if interval == 'hour': 91 | ts = ts - timedelta(hours=1) 92 | if interval == 'day': 93 | ts = ts - timedelta(days=1) 94 | timestamp = time.mktime(ts.utctimetuple()) 95 | else: 96 | try: 97 | timestamp = time.mktime(parse(timestamp).utctimetuple()) 98 | except: 99 | query.update({'error': 'Unable to convert time to timestamp: ' + str(time)}) 100 | query['feed'] = feed 101 | query['interval'] = interval 102 | query['timestamp'] = timestamp 103 | return __query(query) 104 | 105 | 106 | def submit(url, useragent=None, referer=None, priority='low', 107 | access_level='public', callback_url=None, submit_vt=False, 108 | save_only_alerted=False): 109 | """ 110 | Submits an URL for analysis. 111 | 112 | :param url: URL to submit for analysis 113 | 114 | :param useragent: See user_agent_list API function. Setting an 115 | invalid UserAgent will result in a random UserAgent getting 116 | selected. 117 | 118 | :param referer: Referer to be applied to the first visiting URL 119 | 120 | :param priority: Set a priority on the submission. 121 | * *urlfeed*: URL might take several hour before completing. 122 | Used for big unfiltered feeds. Some filtering applies 123 | before accepting to queue so a submitted URL might not 124 | be tested. 125 | * *low*: For vetted or filtered feeds (default) 126 | * *medium*: Normal submissions 127 | * *high*: To ensure highest priority. 128 | 129 | :param access_level: Set accessibility of the report 130 | * *public*: URL is publicly available on the site (default) 131 | * *nonpublic*: Shared with other security organizations/researchers. 132 | * *private*: Only submitting key has access. 133 | 134 | :param callback_url: Results are POSTed back to the provided 135 | URL when processing has completed. The results will be 136 | originating from uqapi.net. Requires an API key. 137 | 138 | :param submit_vt: Submits any unknown file toVirusTotal for 139 | analysis. Information from VirusTotal will be included the 140 | report as soon as they have finished processing the sample. 141 | Most likely will the report from urlquery be available 142 | before the data is received back from VirusTotal. 143 | Default: false 144 | 145 | Only executables, zip archives and pdf documents are 146 | currently submitted. 147 | 148 | .. note:: Not fully implemented yet. 149 | 150 | :param save_only_alerted: Only reports which contains alerts 151 | (IDS, UQ alerts, Blacklists etc.) are kept. The main purpose 152 | for this flag is for mass testing URLs which has not been 153 | properly vetted so only URLs of interest are kept. 154 | Default: false 155 | 156 | Combining this with a callback URL will result in only those 157 | that has alerts on them beingPOSTed back to the callback URL. 158 | 159 | :return: QUEUE_STATUS 160 | 161 | { 162 | "status" : string, ("queued", "processing", "done") 163 | "queue_id" : string, 164 | "report_id" : string, Included once "status" = "done" 165 | "priority" : string, 166 | "url" : URL object, See README 167 | "settings" : SETTINGS object See README 168 | } 169 | 170 | 171 | """ 172 | query = {'method': 'submit'} 173 | if priority not in __priorities: 174 | query.update({'error': 'priority must be in ' + ', '.join(__priorities)}) 175 | if access_level not in __access_levels: 176 | query.update({'error': 'assess_level must be in ' + ', '.join(__access_levels)}) 177 | query['url'] = url 178 | if useragent is not None: 179 | query['useragent'] = useragent 180 | if referer is not None: 181 | query['referer'] = referer 182 | query['priority'] = priority 183 | query['access_level'] = access_level 184 | if callback_url is not None: 185 | query['callback_url'] = callback_url 186 | if submit_vt: 187 | query['submit_vt'] = True 188 | if save_only_alerted: 189 | query['save_only_alerted'] = True 190 | return __query(query) 191 | 192 | 193 | def user_agent_list(): 194 | """ 195 | Returns a list of accepted user agent strings. These might 196 | change over time, select one from the returned list. 197 | 198 | :return: A list of accepted user agents 199 | """ 200 | query = {'method': 'user_agent_list'} 201 | return __query(query) 202 | 203 | 204 | def mass_submit(urls, useragent=None, referer=None, 205 | access_level='public', priority='low', callback_url=None): 206 | """ 207 | See submit for details. All URLs will be queued with the same settings. 208 | 209 | :return: 210 | 211 | { 212 | [QUEUE_STATUS] Array of QUEUE_STATUS objects, See submit 213 | } 214 | """ 215 | query = {'method': 'mass_submit'} 216 | if access_level not in __access_levels: 217 | query.update({'error': 'assess_level must be in ' + ', '.join(__access_levels)}) 218 | if priority not in __priorities: 219 | query.update({'error': 'priority must be in ' + ', '.join(__priorities)}) 220 | if useragent is not None: 221 | query['useragent'] = useragent 222 | if referer is not None: 223 | query['referer'] = referer 224 | query['access_level'] = access_level 225 | query['priority'] = priority 226 | if callback_url is not None: 227 | query['callback_url'] = callback_url 228 | return __query(query) 229 | 230 | 231 | def queue_status(queue_id): 232 | """ 233 | Polls the current status of a queued URL. Normal processing time 234 | for a URL is about 1 minute. 235 | 236 | :param queue_id: QueueIDis returned by the submit API calls 237 | 238 | :return: QUEUE_STATUS (See submit) 239 | """ 240 | query = {'method': 'queue_status'} 241 | query['queue_id'] = queue_id 242 | return __query(query) 243 | 244 | 245 | def report(report_id, recent_limit=0, include_details=False, 246 | include_screenshot=False, include_domain_graph=False): 247 | """ 248 | This extracts data for a given report, the amount of data and 249 | what is included is dependent on the parameters set and the 250 | permissions of the API key. 251 | 252 | :param report_id: ID of the report. To get a valid report_id 253 | either use search to look for specificreports or report_list 254 | to get a list of recently finished reports. 255 | Can be string or an integer 256 | 257 | :param recent_limit: Number of recent reports to include. 258 | Only applies when include_details is true. 259 | Integer, default: 0 260 | 261 | :param include_details: Includes details in the report, like the 262 | alert information, Javascript and transaction data. 263 | Default: False 264 | 265 | :param include_screenshot: A screenshot is included in the report 266 | as a base64. The mime type of the image is also included. 267 | Default: False 268 | 269 | :param include_domain_graph: A domain graph is included in the 270 | report as a base64. The mime type of the image is also included. 271 | Default: False 272 | 273 | 274 | :return: BASICREPORT 275 | 276 | { 277 | "report_id": string, 278 | "date" : string, Date formatted string 279 | "url" : URL, URL object - See README 280 | "settings" : SETTINGS, SETTINGS object - See README 281 | "urlquery_alert_count" : int, Total UQ alerts 282 | "ids_alert_count" : int, Total IDS alert 283 | "blacklist_alert_count" : int, Total Blacklist alerts 284 | "screenshot" : BINBLOB, BINBLOB object - See README 285 | "domain_graph" : BINBLOB BINBLOB object - See README 286 | } 287 | """ 288 | query = {'method': 'report'} 289 | query['report_id'] = report_id 290 | if recent_limit is not None: 291 | query['recent_limit'] = recent_limit 292 | if include_details: 293 | query['include_details'] = True 294 | if include_screenshot: 295 | query['include_screenshot'] = True 296 | if include_domain_graph: 297 | query['include_domain_graph'] = True 298 | return __query(query) 299 | 300 | 301 | def report_list(timestamp=None, limit=50): 302 | """ 303 | Returns a list of reports created from the given timestamp, if it’s 304 | not included the most recent reports will be returned. 305 | 306 | Used to get a list of reports from given timestamp, along with basic 307 | information about the report like number of alerts and the 308 | submitted URL. 309 | 310 | To get reports which are nonpublic or private a API key is needed 311 | which has access to these. 312 | 313 | :param timestamp: Unix Epoch timestamp from the starting point to get 314 | reports. 315 | Default: If None, setted to datetime.now() 316 | 317 | :param limit: Number of reports in the list 318 | Default: 50 319 | 320 | :return: 321 | 322 | { 323 | "reports": [BASICREPORTS] List of BASICREPORTS - See report 324 | } 325 | 326 | """ 327 | query = {'method': 'report_list'} 328 | if timestamp is None: 329 | ts = datetime.now() 330 | timestamp = time.mktime(ts.utctimetuple()) 331 | else: 332 | try: 333 | timestamp = time.mktime(parse(timestamp).utctimetuple()) 334 | except: 335 | query.update({'error': 'Unable to convert time to timestamp: ' + str(time)}) 336 | query['timestamp'] = timestamp 337 | query['limit'] = limit 338 | return __query(query) 339 | 340 | 341 | def search(q, search_type='string', result_type='reports', 342 | url_matching='url_host', date_from=None, deep=False): 343 | """ 344 | Search in the database 345 | 346 | :param q: Search query 347 | 348 | :param search_type: Search type 349 | * *string*: Used to find URLs which contains a given string. 350 | To search for URLs on a specific IP use string. If a 351 | string is found to match an IP address it will automaticly 352 | search based on the IP. (default) 353 | * *regexp*: Search for a regexp pattern within URLs 354 | * *ids_alert*: Search for specific IDS alerts 355 | * *urlquery_alert*: ????? FIXME ????? 356 | * *js_script_hash*: Used to search for URLs/reports which 357 | contains a specific JavaScript. The scripts are searched 358 | based on SHA256, the hash value for each script are 359 | included in the report details. Can be used to find other 360 | 361 | :param result_type: Result type 362 | * *reports*: Full reports (default) 363 | * *url_list*: List of urls 364 | 365 | :param url_matching: What part of an URL to do pattern matching 366 | against. Only applies to string and regexp searches. 367 | * *url_host*: match against host (default) 368 | * *url_path*: match against path 369 | 370 | 371 | :param date_from: Unix epoch timestamp for starting searching point. 372 | Default: If None, setted to datetime.now() 373 | 374 | 375 | :param deep: Search all URLs, not just submitted URLs. 376 | Default: false 377 | Experimental! Should be used with care as it’s very resource 378 | intensive. 379 | """ 380 | query = {'method': 'search'} 381 | if search_type not in __search_types: 382 | query.update({'error': 'search_type can only be in ' + ', '.join(__search_types)}) 383 | if result_type not in __result_types: 384 | query.update({'error': 'result_type can only be in ' + ', '.join(__result_types)}) 385 | if url_matching not in __url_matchings: 386 | query.update({'error': 'url_matching can only be in ' + ', '.join(__url_matchings)}) 387 | 388 | if date_from is None: 389 | ts = datetime.now() 390 | timestamp = time.mktime(ts.utctimetuple()) 391 | else: 392 | try: 393 | timestamp = time.mktime(parse(date_from).utctimetuple()) 394 | except: 395 | query.update({'error': 'Unable to convert time to timestamp: ' + str(time)}) 396 | 397 | query['q'] = q 398 | query['search_type'] = search_type 399 | query['result_type'] = result_type 400 | query['url_matching'] = url_matching 401 | query['from'] = timestamp 402 | if deep: 403 | query['deep'] = True 404 | return __query(query) 405 | 406 | 407 | def reputation(q): 408 | """ 409 | Searches a reputation list of URLs detected over the last month. 410 | The search query can be a domain or an IP. 411 | 412 | With an API key, matching URLs will be returned along with the 413 | triggering alert. 414 | 415 | :param q: Search query 416 | """ 417 | 418 | query = {'method': 'reputation'} 419 | query['q'] = q 420 | return __query(query) 421 | --------------------------------------------------------------------------------