├── Education_OpenClassroom
    └── Readme.md
├── JupyterCon2017_NotebookNarratives.pptx
├── README.md
├── Retail_ImageClassificationStockkeeping
    ├── FindingGroceriesInImages.ipynb
    ├── Grocery
    │   └── testImages
    │   │   ├── WIN_20160803_11_28_42_Pro.bboxes.labels.tsv
    │   │   ├── WIN_20160803_11_28_42_Pro.bboxes.tsv
    │   │   ├── WIN_20160803_11_28_42_Pro.jpg
    │   │   ├── WIN_20160803_11_42_36_Pro.bboxes.labels.tsv
    │   │   ├── WIN_20160803_11_42_36_Pro.bboxes.tsv
    │   │   ├── WIN_20160803_11_42_36_Pro.jpg
    │   │   ├── WIN_20160803_11_46_03_Pro.bboxes.labels.tsv
    │   │   ├── WIN_20160803_11_46_03_Pro.bboxes.tsv
    │   │   ├── WIN_20160803_11_46_03_Pro.jpg
    │   │   ├── WIN_20160803_11_48_26_Pro.bboxes.labels.tsv
    │   │   ├── WIN_20160803_11_48_26_Pro.bboxes.tsv
    │   │   ├── WIN_20160803_11_48_26_Pro.jpg
    │   │   ├── WIN_20160803_12_37_07_Pro.bboxes.labels.tsv
    │   │   ├── WIN_20160803_12_37_07_Pro.bboxes.tsv
    │   │   └── WIN_20160803_12_37_07_Pro.jpg
    ├── README.md
    ├── cntk_helpers.py
    ├── environment.yml
    ├── fastRCNN
    │   ├── __init__.py
    │   ├── imdb.py
    │   ├── nms.py
    │   ├── pascal_voc.py
    │   ├── test.py
    │   ├── timer.py
    │   ├── train_svms.py
    │   ├── utils
    │   │   ├── cython_bbox.pyd
    │   │   ├── cython_bbox.so
    │   │   ├── cython_nms.pyd
    │   │   └── cython_nms.so
    │   └── voc_eval.py
    └── selectivesearch
    │   ├── README.md
    │   ├── __init__.py
    │   └── selectivesearch.py
├── Services_CustomSearchExpertSystems
    ├── JupyterNotebooks
    │   ├── 1_ServicesDemo_Build_a_Custom_Search_Engine.ipynb
    │   ├── SmartStoplist.txt
    │   ├── SmartStoplist_extended.txt
    │   ├── rake.py
    │   ├── sample_page.png
    │   ├── searchdesign.png
    │   └── toolsandprocess.png
    ├── Python
    │   ├── azsearch_mgmt.py
    │   ├── azsearch_query.py
    │   ├── azsearch_queryall.py
    │   └── keyphrase_extract.py
    ├── README.md
    └── sample
    │   ├── html
    │       ├── 1.1.1.1.1.1.html
    │       ├── 1.1.1.1.1.2.html
    │       ├── 1.1.1.1.1.3.html
    │       ├── 1.1.1.1.2.1.html
    │       ├── 1.1.1.1.4.1.1.html
    │       ├── 1.1.1.1.4.1.2.html
    │       ├── 1.1.1.1.4.1.3.html
    │       ├── 1.1.1.1.4.1.4.html
    │       ├── 1.1.1.1.4.1.5.html
    │       ├── 1.1.1.1.4.1.6.html
    │       ├── 1.1.1.1.4.1.7.html
    │       ├── 1.1.1.1.4.1.8.html
    │       ├── 1.1.1.11.2.1.2.html
    │       └── styles
    │       │   ├── css_Q4z0-iME7xTpui0Tzf4MEFv02rRuJ1dHZbo9kP_JLBg.css
    │       │   ├── css_XgGKW_fNRFCK5BruHWlbChY4U8WE0xT4CWGilKSjSXA.css
    │       │   ├── css_dolo-SIAwemLdrlTs99Lrug9kFXMYlMG3OlznBv4Kho.css
    │       │   ├── css_kShW4RPmRstZ3SpIC-ZvVGNFVAi0WEMuCnI0ZkYIaFw.css
    │       │   ├── css_rJ3pqftttKVzxtjsOG18hAid4RqqjfFMw3d1C89lWd4.css
    │       │   └── css_tuqeOBz1ozigHOvScJR2wasCmXBizZ9rfd58u6_20EE.css
    │   ├── parsed_content.xlsx
    │   ├── parsed_content_cornell_full.xlsx
    │   ├── parsed_content_sample.xlsx
    │   ├── raw_text_enriched_with_keywords_sample.xlsx
    │   └── sample_page.png
└── Sports_IoTSensorSkillClassification
    ├── 1_Ski_Sports_IoT_Sensor_Data_Analysis.ipynb
    ├── AUC.png
    ├── rawdata.png
    ├── sensorpositions.png
    ├── ski_feature_set.csv
    ├── timeseriesraw.png
    ├── trunktwistdef.png
    └── trunktwistmax.png


/Education_OpenClassroom/Readme.md:
--------------------------------------------------------------------------------
 1 | # Demo 1 - Using Cognitive Services API within Jupyter Notebooks
 2 | Microsoft Cognitive Services let you build apps with powerful algorithms to see, hear, speak, understand and interpret using natural methods of communication, with just a few lines of code. Easily add intelligent features, such as emotion and sentiment detection, vision and speech recognition, language understanding, knowledge and search. The following demos are to demonstrate how cognitive services can be introduced using Jupyter Notebooks.
 3 | 
 4 | see the following [Cognitive API Demos](http://aka.ms/CognitiveNotebooks) 
 5 | 
 6 | ## Demo [Cognitive+Services+Translation.ipynb](https://notebooks.azure.com/LeeStott-Microsoft/libraries/CogPy/html/Cognitive+Services+Translation.ipynb) 
 7 | 
 8 | Uses the Microsoft Cognitive Text Transalator API to transalate convert text from one language to another - this show practical use of Jupyter Python Notebooks in course such as lingutics.
 9 | 
10 | ## Demo [FaceDetectionAPI.ipynb](https://notebooks.azure.com/LeeStott-Microsoft/libraries/CogPy/html/FaceDetectionAPi.ipynb) 
11 | 
12 | Uses the Microsoft Cognitive Face Detection API to identify the following attitirbutes
13 | https://azure.microsoft.com/en-gb/services/cognitive-services/face/ 
14 | 
15 | - Face Attributes 
16 | - Age
17 | - Emotion
18 | - Gender
19 | - HeadPose
20 | - FaceRectangle
21 | 
22 | Examples of Usage include
23 | - Face verification
24 | Check the likelihood that two faces belong to the same person. The API will return a confidence score of how likely it is that the two faces belong to one person.
25 | - Face detection
26 | Detect one or more human faces in an image and get back face rectangles for where in the image the faces are, along with face attributes that contain machine learning-based predictions of facial features. The face attribute features available are: Age, Emotion, Gender, Pose, Smile and Facial Hair, along with 27 landmarks for each face in the image.
27 | 
28 | # Demo 2 - Using Jupyter Notebooks within the curricula at University of Cambridge Maths & Engineering 
29 | 
30 | Using Microsoft Azure Notebooks at the University of Cambridge by Dr Garth Wells
31 | 
32 | ## Demo [3M1 - Mathematical Methods](https://notebooks.azure.com/garth-wells/libraries/CUED-3M1) 
33 | 
34 | These notebooks are in support of the linear algebra section of the course 3M1: Mathematical Methods at the Department of Engineering, University of Cambridge.
35 | Viewing and running
36 | Copies of these notebooks are posted to the Microsoft Azure cloud notebook service:
37 | https://notebooks.azure.com/library/CUED-3M1
38 | 
39 | The notebooks can be viewed, modified and executed on the Azure service.
40 | 
41 | Feedback and corrections
42 | Please report suggestions or errors at:
43 | https://github.com/garth-wells/notebooks-3M1/issues
44 | 
45 | These notebooks are developed by Garth N. Wells (gnw20@cam.ac.uk).
46 | 
47 | License and copyright
48 | All material is copyright of Garth N. Wells (gnw20@cam.ac.uk).
49 | 
50 | All text is made available under the Creative Commons Attribution-ShareAlike 4.0 International Public License (https://creativecommons.org/licenses/by-sa/4.0/legalcode).
51 | All computer code is released under the MIT license. 
52 | 
53 | ## Demo [CUED-IA-Computing-Michaelmas](https://notebooks.azure.com/garth-wells/libraries/CUED-IA-Computing-Michaelmas)
54 | 
55 | Part IA Computing (Michaelmas Term)
56 | This repository contains the Jupyter notebook activities for Part IA of the computing course (Michaelmas Term) in the Engineering Tripos at University of Cambridge.
57 | 
58 | Viewing and running
59 | Copies of these notebooks are posted to the Microsoft Azure cloud service:
60 | https://notebooks.azure.com/library/CUED-IA-Computing-Michaelmas
61 | 
62 | It is recommended that you use these notebooks via the above link.
63 | Feedback and corrections
64 | Please report suggestions or errors at:
65 | https://github.com/CambridgeEngineering/PartIA-Computing-Michaelmas/issues
66 | 
67 | Author
68 | These notebooks are developed by Garth N. Wells (gnw20@cam.ac.uk).
69 | 
70 | Acknowledgements
71 | Valuable feedback during the development of the notebooks was provided by Quang T. Ha, Hugo Hadfield, Tim Love, Chris Richardson and Joanna Stadnik.
72 | 
73 | License and copyright
74 | All material is copyright of Garth N. Wells (gnw20@cam.ac.uk).
75 | 
76 | All text is made available under the Creative Commons Attribution-ShareAlike 4.0 International Public License (https://creativecommons.org/licenses/by-sa/4.0/legalcode).
77 | 
78 | All computer code is released under the MIT license.
79 | The MIT License (MIT) Copyright (c) 2016 Garth N. Wells
80 | 
81 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
82 | 
83 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
84 | 
85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
86 | 
87 | # Some other examples of Notebooks in Education
88 | 
89 | Rich Vuduc Professor at the Georgia Institute of Technology, in the School of Computational Science and Engineering
90 | [Computational Engineering at Georgia Institute](https://notebooks.azure.com/richie/ )
91 | 
92 | 
93 | University of Cambridge [Engineering Module, University of Cambridge Engineering Part IB
94 | Information Engineering Elective Paper 8: Image Searching and Modelling Using Machine Learning](https://notebooks.azure.com/LeeStott-Microsoft/libraries/Cambridge_Engineering) 


--------------------------------------------------------------------------------
/JupyterCon2017_NotebookNarratives.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/JupyterCon2017_NotebookNarratives.pptx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | * 
 2 | # JupyterCon2017
 3 | Notebook Narratives for Industry from 2017 JupyterCon
 4 | 
 5 | 
 6 | > Sample notebooks for industry specific machine learning implementations
 7 | 
 8 | > Designed for you to re-use for your own solutions
 9 | 
10 | To run Jupyter Notebooks directly from this Github use [Binder](http://mybinder.org/) Turn a GitHub repo into a collection of interactive notebooks
11 | 
12 | ## Contents
13 | ### Education - Open Classroom
14 | * Microsoft Cognitive API http://aka.ms/cognitive and Azure Jupyter Python Notebooks showcasing the Microsoft Cognitive APIs which can be used across curriculum. 
15 | 
16 | * As the pace of global innovation continues to accelerate, the University of Cambridge is evolving engineering & Mathematics curriculum to teach core concepts faster using higher level, open source tools in the public cloud. For example, a Dr Garth Wells has increased learning in an introductory computing class by having students use Microsoft Azure Notebooks, which allows them to spend more time mastering concepts and enhancing problem solving skills and less time on language syntax. This technology switch also gives students anytime, anywhere access to required tools needed to complete assignments, and it facilitates greater collaboration between professors, students, and the larger community. In addition, after Cambridge adopted a public cloud solution, IT infrastructure doesn’t limit the ingenuity of bright minds. 
17 |  
18 |  Dr Garth Wells University of Cambridge - Mathematics & Enginnering Notebooks hosted on http://Notebooks.azure.com 
19 | 
20 | ### Services - Custom Search for an Expert System Chat
21 | * Description - Querying specific content areas quickly and easily is a common services sector need. Fast traversal of specialized publications, customer support knowledge bases or document repositories allows service companies to deliver their particular service efficiently and effectively. Simple FAQs don’t cover enough ground, and a string search isn’t effective or efficient for those not familiar with the domain or the document set. Instead, these companies can deliver a custom search experience that saves their clients time and provides them better service through a question and answer format.  In this project, we leveraged Azure Search and Cognitive Services and we share our custom code for iterative testing, measurement and indexer redeployment. In our solution, the customized search engine will form the foundation for delivering a question and answer experience in a specific domain area.
22 | 
23 | * The notebook here allows you to pre-process and enrich source text, upload a search index, and interactively query that search engine.
24 | 
25 | * Read the complete code story behind this example at the Microsoft Develop Blog here: https://www.microsoft.com/developerblog/2017/08/07/developing-a-custom-search-engine-for-an-expert-system/
26 | 
27 | 
28 | ### Retail - Image Classification for Automatic Stockkeeping
29 | * Description - Using a photo of an end-cap, classify if the stocking is in-compliance or out of compliance with the planogram.
30 | Leverages CNTK.  Uses Fast R-CNN.
31 | 
32 | 
33 | ### Sports Training - Sensor Based Expertise Classification
34 | * Description - With wearable IoT sensors, we can collect positional and motion data that allow us to measure this expertise level distinction between professionals and amateurs with high precision and accuracy.  In our analysis, we discovered the sensor data from just nine body positions provides ample signal to generate distinct activity signatures for the professional skiers when compared with the amateurs.  In the notebook we describe how we engineer features for time segments that describe differences that describe motion and expertise relative to the expert.  This allows amateurs to analyze deficit areas to improve their skiing.
35 | 
36 |  
37 | ## More Links
38 | * https://aka.ms/NotebookNarratives 
39 | * http://LearnAnalytics.microsoft.com/
40 | * http://Microsoft.com/AI
41 | * http://Microsoft.com/DeveloperBlog
42 | 
43 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_28_42_Pro.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | avocado
 2 | orange
 3 | ketchup
 4 | onion
 5 | eggBox
 6 | joghurt
 7 | gerkin
 8 | pepper
 9 | pepper
10 | champagne
11 | orangeJuice
12 | tomato
13 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_28_42_Pro.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 756	411	972	563
 2 | 789	593	1000	766
 3 | 578	409	749	764
 4 | 301	403	455	553
 5 | 96	611	524	764
 6 | 695	889	893	1183
 7 | 420	883	603	1152
 8 | 236	991	419	1183
 9 | 4	958	234	1206
10 | 778	1267	1044	1651
11 | 526	1271	708	1624
12 | 65	1354	474	1599
13 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_28_42_Pro.jpg


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_42_36_Pro.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | gerkin
 2 | tabasco
 3 | pepper
 4 | avocado
 5 | tomato
 6 | pepper
 7 | orangeJuice
 8 | milk
 9 | milk
10 | tomato
11 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_42_36_Pro.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 649	340	814	612
 2 | 634	505	722	778
 3 | 413	618	586	776
 4 | 102	628	349	776
 5 | 634	1025	904	1194
 6 | 386	956	580	1150
 7 | 131	954	338	1175
 8 | 662	1261	841	1567
 9 | 442	1250	626	1557
10 | 156	1425	300	1569
11 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_42_36_Pro.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_42_36_Pro.jpg


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_46_03_Pro.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | butter
 2 | tomato
 3 | tabasco
 4 | tomato
 5 | avocado
 6 | gerkin
 7 | water
 8 | pepper
 9 | eggBox
10 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_46_03_Pro.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 616	680	831	785
 2 | 424	684	566	774
 3 | 211	480	290	778
 4 | 607	1014	920	1167
 5 | 399	1006	549	1183
 6 | 84	889	317	1215
 7 | 697	1277	948	1592
 8 | 336	1284	541	1476
 9 | 8	1294	319	1605
10 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_46_03_Pro.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_46_03_Pro.jpg


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_48_26_Pro.bboxes.labels.tsv:
--------------------------------------------------------------------------------
1 | butter
2 | ketchup
3 | milk
4 | orangeJuice
5 | gerkin
6 | joghurt
7 | eggBox
8 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_48_26_Pro.bboxes.tsv:
--------------------------------------------------------------------------------
1 | 607	687	858	793
2 | 200	655	516	781
3 | 657	972	872	1198
4 | 286	972	536	1179
5 | 0	883	207	1194
6 | 415	1244	628	1582
7 | 6	1288	307	1626
8 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_48_26_Pro.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_11_48_26_Pro.jpg


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_12_37_07_Pro.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | butter
 3 | orangeJuice
 4 | joghurt
 5 | ketchup
 6 | orange
 7 | champagne
 8 | water
 9 | eggBox
10 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_12_37_07_Pro.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 728	367	856	582
 2 | 730	666	1020	781
 3 | 246	561	618	783
 4 | 803	899	1004	1217
 5 | 509	1046	806	1229
 6 | 234	977	467	1194
 7 | 735	1313	972	1686
 8 | 540	1323	733	1667
 9 | 190	1294	509	1632
10 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_12_37_07_Pro.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/Grocery/testImages/WIN_20160803_12_37_07_Pro.jpg


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/README.md:
--------------------------------------------------------------------------------
 1 | # Finding Groceries Using Fast R-CNN in CNTK
 2 | 
 3 | # Introduction
 4 | 
 5 | We use Fast R-CNN to find rough locations and types of groceries in pictures. Please see the `FindingGroceriesInImages.ipynb` Jupyter Notebook for details.
 6 | 
 7 | # Prerequisites
 8 | 
 9 | The instructions below (apologies in advance!) are for Windows 10 64-bit users. I'll gladly accept pull requests for other operating systems, but haven't had a chance to run through all of them on every system.
10 | 
11 | Please create a Python 3.4 environment with the appropriate setup by:
12 | 
13 | - Install Anaconda
14 | - Create a new Anaconda environment using the included environment.yml file
15 |    - `conda env create --name myNewEnv -f environment.yml python=3.4`
16 | - Install `scikit-image` and `opencv` using the pre-built Wheel files 
17 | 
18 | NOTE: I've put the direct link to CNTK 2.1's Python 3.4 Wheel in the `environment.yml` - if you're not on Windows 64-bit, you'll need to switch that out or remove it and [install CNTK by hand](https://docs.microsoft.com/en-us/cognitive-toolkit/Setup-CNTK-on-your-machine).
19 | 
20 | ## Installing pre-built wheel files
21 | 
22 | You can acquire pre-built Wheel files for Scikit-Image and OpenCV from http://www.lfd.uci.edu/~gohlke/pythonlibs/, download them (the `cp34` versions for Python 3.4, the `amd64` version since we're on 64-bit Windows), and install using `pip install \path\to\wheel-file`.
23 | 
24 | ## C Library code
25 | 
26 | The Fast R-CNN implementation for CNTK depends on custom C code from the original Fast R-CNN [GitHub repo](https://github.com/rbgirshick/fast-rcnn) which has been built for 64-bit Windows and Python 3.4. In theory, building this code for other versions of Python and other operating systems is possible, but I have yet to do so. Once again, if you find yourself doing so, please submit a pull request as I'd love to extend this beyond just Windows.
27 | 
28 | # Appendix
29 | 
30 | ## Why Fast R-CNN?
31 | 
32 | As you know if you watch the Deep Learning space, Fast R-CNN is _far_ from state-of-the-art for the Object Detection problem. This was _not_ true when we were working with our partner - we chose Fast R-CNN because the CNTK team had a beta version of their current [Example](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FastRCNN) and at the time getting the pipeline in place was more important than implementing Faster R-CNN (the best at the time) from scratch. We also knew that even Faster R-CNN's edge would erode quickly in this space, so getting them up and running and able to experiment as new techniques came along was more important.


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/environment.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - defaults
 3 | dependencies:
 4 | - mkl=2017.0.3=0
 5 | - numpy=1.11.3=py34_0
 6 | - pip=9.0.1=py34_1
 7 | - python=3.4.5=0
 8 | - scipy=0.19.1=np111py34_0
 9 | - setuptools=27.2.0=py34_1
10 | - vs2010_runtime=10.00.40219.1=2
11 | - wheel=0.29.0=py34_0
12 | - pip:
13 |   - backports-abc==0.5
14 |   - bleach==2.0.0
15 |   - https://cntk.ai/PythonWheel/GPU/cntk-2.1-cp34-cp34m-win_amd64.whl
16 |   - colorama==0.3.9
17 |   - cycler==0.10.0
18 |   - decorator==4.1.2
19 |   - easydict==1.7
20 |   - entrypoints==0.2.3
21 |   - future==0.16.0
22 |   - html5lib==0.999999999
23 |   - ipykernel==4.6.1
24 |   - ipython==6.1.0
25 |   - ipython-genutils==0.2.0
26 |   - ipywidgets==6.0.0
27 |   - jedi==0.10.2
28 |   - jinja2==2.9.6
29 |   - jsonschema==2.6.0
30 |   - jupyter==1.0.0
31 |   - jupyter-client==5.1.0
32 |   - jupyter-console==5.1.0
33 |   - jupyter-core==4.3.0
34 |   - markupsafe==1.0
35 |   - matplotlib==2.0.2
36 |   - mistune==0.7.4
37 |   - nbconvert==5.2.1
38 |   - nbformat==4.3.0
39 |   - networkx==1.11
40 |   - notebook==5.0.0
41 |   - olefile==0.44
42 |   - pandocfilters==1.4.2
43 |   - pickleshare==0.7.4
44 |   - pillow==4.2.1
45 |   - prompt-toolkit==1.0.15
46 |   - pygments==2.2.0
47 |   - pyparsing==2.2.0
48 |   - python-dateutil==2.6.1
49 |   - pytz==2017.2
50 |   - pywavelets==0.5.2
51 |   - pyzmq==16.0.2
52 |   - qtconsole==4.3.1
53 |   - scikit-learn==0.19.0
54 |   - simplegeneric==0.8.1
55 |   - six==1.10.0
56 |   - testpath==0.3.1
57 |   - tornado==4.5.1
58 |   - traitlets==4.3.2
59 |   - typing==3.6.2
60 |   - wcwidth==0.1.7
61 |   - webencodings==0.5.1
62 |   - widgetsnbextension==2.0.0
63 |   - win-unicode-console==0.5
64 |   - xmltodict==0.11.0
65 | 
66 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from .imdb import imdb
 8 | from .pascal_voc import pascal_voc
 9 | 
10 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | import os.path as osp
 10 | import PIL
 11 | import numpy as np
 12 | import scipy.sparse
 13 | from builtins import range
 14 | 
 15 | import sys
 16 | from .utils.cython_bbox import bbox_overlaps
 17 | 
 18 | class imdb(object):
 19 |     """Image database."""
 20 | 
 21 |     def __init__(self, name):
 22 |         self._name = name
 23 |         self._num_classes = 0
 24 |         self._classes = []
 25 |         self._image_index = []
 26 |         self._obj_proposer = 'selective_search'
 27 |         self._roidb = None
 28 |         self._roidb_handler = self.default_roidb
 29 |         # Use this dict for storing dataset specific config options
 30 |         self.config = {}
 31 | 
 32 |     @property
 33 |     def name(self):
 34 |         return self._name
 35 | 
 36 |     @property
 37 |     def num_classes(self):
 38 |         return len(self._classes)
 39 | 
 40 |     @property
 41 |     def classes(self):
 42 |         return self._classes
 43 | 
 44 |     @property
 45 |     def image_index(self):
 46 |         return self._image_index
 47 | 
 48 |     @property
 49 |     def roidb_handler(self):
 50 |         return self._roidb_handler
 51 | 
 52 |     @roidb_handler.setter
 53 |     def roidb_handler(self, val):
 54 |         self._roidb_handler = val
 55 | 
 56 |     @property
 57 |     def roidb(self):
 58 |         # A roidb is a list of dictionaries, each with the following keys:
 59 |         #   boxes
 60 |         #   gt_overlaps
 61 |         #   gt_classes
 62 |         #   flipped
 63 |         if self._roidb is not None:
 64 |             return self._roidb
 65 |         self._roidb = self.roidb_handler()
 66 |         return self._roidb
 67 | 
 68 |     # @property
 69 |     # def cache_path(self):
 70 |     #     cache_path = osp.abspath(osp.join(datasets.ROOT_DIR, 'data', 'cache'))
 71 |     #     print cache_path
 72 |     #     if not os.path.exists(cache_path):
 73 |     #         os.makedirs(cache_path)
 74 |     #     return cache_path
 75 | 
 76 |     @property
 77 |     def num_images(self):
 78 |       return len(self.image_index)
 79 | 
 80 |     def image_path_at(self, i):
 81 |         raise NotImplementedError
 82 | 
 83 |     def default_roidb(self):
 84 |         raise NotImplementedError
 85 | 
 86 |     def evaluate_detections(self, all_boxes, output_dir=None):
 87 |         """
 88 |         all_boxes is a list of length number-of-classes.
 89 |         Each list element is a list of length number-of-images.
 90 |         Each of those list elements is either an empty list []
 91 |         or a numpy array of detection.
 92 | 
 93 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
 94 |         """
 95 |         raise NotImplementedError
 96 | 
 97 |     def append_flipped_images(self):
 98 |         num_images = self.num_images
 99 |         widths = [PIL.Image.open(self.image_path_at(i)).size[0]
100 |                   for i in range(num_images)]
101 |         for i in range(num_images):
102 |             boxes = self.roidb[i]['boxes'].copy()
103 |             oldx1 = boxes[:, 0].copy()
104 |             oldx2 = boxes[:, 2].copy()
105 |             boxes[:, 0] = widths[i] - oldx2 - 1
106 |             boxes[:, 2] = widths[i] - oldx1 - 1
107 |             assert (boxes[:, 2] >= boxes[:, 0]).all()
108 |             entry = {'boxes' : boxes,
109 |                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],
110 |                      'gt_classes' : self.roidb[i]['gt_classes'],
111 |                      'flipped' : True}
112 |             self.roidb.append(entry)
113 |         self._image_index = self._image_index * 2
114 | 
115 |     def evaluate_recall(self, candidate_boxes, ar_thresh=0.5):
116 |         # Record max overlap value for each gt box
117 |         # Return vector of overlap values
118 |         gt_overlaps = np.zeros(0)
119 |         for i in range(self.num_images):
120 |             gt_inds = np.where(self.roidb[i]['gt_classes'] > 0)[0]
121 |             gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
122 | 
123 |             boxes = candidate_boxes[i]
124 |             if boxes.shape[0] == 0:
125 |                 continue
126 |             overlaps = bbox_overlaps(boxes.astype(np.float),
127 |                                      gt_boxes.astype(np.float))
128 | 
129 |             # gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0)))
130 |             _gt_overlaps = np.zeros((gt_boxes.shape[0]))
131 |             for j in range(gt_boxes.shape[0]):
132 |                 argmax_overlaps = overlaps.argmax(axis=0)
133 |                 max_overlaps = overlaps.max(axis=0)
134 |                 gt_ind = max_overlaps.argmax()
135 |                 gt_ovr = max_overlaps.max()
136 |                 assert(gt_ovr >= 0)
137 |                 box_ind = argmax_overlaps[gt_ind]
138 |                 _gt_overlaps[j] = overlaps[box_ind, gt_ind]
139 |                 assert(_gt_overlaps[j] == gt_ovr)
140 |                 overlaps[box_ind, :] = -1
141 |                 overlaps[:, gt_ind] = -1
142 | 
143 |             gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
144 | 
145 |         num_pos = gt_overlaps.size
146 |         gt_overlaps = np.sort(gt_overlaps)
147 |         step = 0.001
148 |         thresholds = np.minimum(np.arange(0.5, 1.0 + step, step), 1.0)
149 |         recalls = np.zeros_like(thresholds)
150 |         for i, t in enumerate(thresholds):
151 |             recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
152 |         ar = 2 * np.trapz(recalls, thresholds)
153 | 
154 |         return ar, gt_overlaps, recalls, thresholds
155 | 
156 |     def create_roidb_from_box_list(self, box_list, gt_roidb):
157 |         assert len(box_list) == self.num_images, \
158 |                 'Number of boxes must match number of ground-truth images'
159 |         roidb = []
160 |         for i in range(self.num_images):
161 |             boxes = box_list[i]
162 |             num_boxes = boxes.shape[0]
163 |             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
164 | 
165 |             if gt_roidb and gt_roidb[i]:
166 |                 gt_boxes = gt_roidb[i]['boxes']
167 |                 gt_classes = gt_roidb[i]['gt_classes']
168 |                 if len(gt_classes) > 0: #for pascal every image has at least one annotated object. This is not the case however if including negative images
169 |                     gt_overlaps = bbox_overlaps(boxes.astype(np.float),
170 |                                                 gt_boxes.astype(np.float))
171 | 
172 |                     argmaxes = gt_overlaps.argmax(axis=1)
173 |                     maxes = gt_overlaps.max(axis=1)
174 |                     I = np.where(maxes > 0)[0]
175 |                     overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
176 | 
177 |             overlaps = scipy.sparse.csr_matrix(overlaps)
178 |             roidb.append({'boxes' : boxes,
179 |                           'gt_classes' : np.zeros((num_boxes,),
180 |                                                   dtype=np.int32),
181 |                           'gt_overlaps' : overlaps,
182 |                           'flipped' : False})
183 |         return roidb
184 | 
185 |     @staticmethod
186 |     def merge_roidbs(a, b):
187 |         assert len(a) == len(b)
188 |         for i in range(len(a)):
189 |             if a[i]: #if image has at least one annotated object
190 |                 a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
191 |                 a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
192 |                                                 b[i]['gt_classes']))
193 |                 a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
194 |                                                            b[i]['gt_overlaps']])
195 |             else:
196 |                 a[i] = b[i]
197 |         return a
198 | 
199 |     def competition_mode(self, on):
200 |         """Turn competition mode on or off."""
201 |         pass
202 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def nms(dets, thresh):
11 |     x1 = dets[:, 0]
12 |     y1 = dets[:, 1]
13 |     x2 = dets[:, 2]
14 |     y2 = dets[:, 3]
15 |     scores = dets[:, 4]
16 | 
17 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 |     order = scores.argsort()[::-1]
19 | 
20 |     keep = []
21 |     while order.size > 0:
22 |         i = order[0]
23 |         keep.append(i)
24 |         xx1 = np.maximum(x1[i], x1[order[1:]])
25 |         yy1 = np.maximum(y1[i], y1[order[1:]])
26 |         xx2 = np.minimum(x2[i], x2[order[1:]])
27 |         yy2 = np.minimum(y2[i], y2[order[1:]])
28 | 
29 |         w = np.maximum(0.0, xx2 - xx1 + 1)
30 |         h = np.maximum(0.0, yy2 - yy1 + 1)
31 |         inter = w * h
32 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 | 
34 |         inds = np.where(ovr <= thresh)[0]
35 |         order = order[inds + 1]
36 | 
37 |     return keep


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/pascal_voc.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | from __future__ import print_function
  9 | import os, pdb
 10 | import xml.dom.minidom as minidom
 11 | import numpy as np
 12 | import scipy.sparse
 13 | import scipy.io as sio
 14 | import pickle as cp
 15 | import subprocess
 16 | from .imdb import imdb
 17 | from .voc_eval import voc_eval
 18 | #from fastRCNN.imdb import imdb
 19 | #from fastRCNN.voc_eval import voc_eval
 20 | 
 21 | class pascal_voc(imdb):
 22 |     def __init__(self, image_set, year, classes, maxNrRois, cacheDir, devkit_path=None):
 23 |         imdb.__init__(self, 'voc_' + year + '_' + image_set)
 24 |         self._year = year
 25 |         self._image_set = image_set
 26 |         self._maxNrRois = maxNrRois
 27 |         self._ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
 28 |         self._cacheDir = cacheDir
 29 |         self._devkit_path = self._get_default_path() if devkit_path is None \
 30 |                             else os.path.join(devkit_path, 'VOCdevkit')
 31 |         self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
 32 |         self._classes = classes
 33 |                          #('__background__', # always index 0
 34 |                          # 'aeroplane', 'bicycle', 'bird', 'boat',
 35 |                          # 'bottle', 'bus', 'car', 'cat', 'chair',
 36 |                          # 'cow', 'diningtable', 'dog', 'horse',
 37 |                          # 'motorbike', 'person', 'pottedplant',
 38 |                          # 'sheep', 'sofa', 'train', 'tvmonitor')
 39 |         self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
 40 |         self._image_ext = '.jpg'
 41 |         self._image_index = self._load_image_set_index()
 42 |         # Default to roidb handler
 43 |         self._roidb_handler = self.selective_search_roidb
 44 | 
 45 |         # PASCAL specific config options
 46 |         self.config = {'cleanup'  : True,
 47 |                        'use_salt' : True,
 48 |                        'top_k'    : 2000}
 49 | 
 50 |         assert os.path.exists(self._devkit_path), \
 51 |                 'VOCdevkit path does not exist: {}'.format(self._devkit_path)
 52 |         assert os.path.exists(self._data_path), \
 53 |                 'Path does not exist: {}'.format(self._data_path)
 54 | 
 55 |     @property
 56 |     def cache_path(self):
 57 |         cache_path = self._cacheDir
 58 |         #cache_path = osp.abspath(osp.join(datasets.ROOT_DIR, 'data', 'cache'))
 59 |         if not os.path.exists(cache_path):
 60 |             os.makedirs(cache_path)
 61 |         return cache_path
 62 | 
 63 |     def image_path_at(self, i):
 64 |         """
 65 |         Return the absolute path to image i in the image sequence.
 66 |         """
 67 |         return self.image_path_from_index(self._image_index[i])
 68 | 
 69 |     def image_path_from_index(self, index):
 70 |         """
 71 |         Construct an image path from the image's "index" identifier.
 72 |         """
 73 |         image_path = os.path.join(self._data_path, 'JPEGImages',
 74 |                                   index + self._image_ext)
 75 |         assert os.path.exists(image_path), \
 76 |                 'Path does not exist: {}'.format(image_path)
 77 |         return image_path
 78 | 
 79 |     def _load_image_set_index(self):
 80 |         """
 81 |         Load the indexes listed in this dataset's image set file.
 82 |         """
 83 |         # Example path to image set file:
 84 |         # self._devkit_path + /VOCdevkit/VOC2007/ImageSets/Main/val.txt
 85 |         image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
 86 |                                       self._image_set + '.txt')
 87 |         assert os.path.exists(image_set_file), \
 88 |                 'Path does not exist: {}'.format(image_set_file)
 89 |         with open(image_set_file) as f:
 90 |             image_index = [x.strip() for x in f.readlines()]
 91 |         return image_index
 92 | 
 93 |     def _get_default_path(self):
 94 |         """
 95 |         Return the default path where PASCAL VOC is expected to be installed.
 96 |         """
 97 |         return os.path.join(self._ROOT_DIR, 'data', 'pascalVoc', 'VOCdevkit')
 98 | 
 99 |     def gt_roidb(self):
100 |         """
101 |         Return the database of ground-truth regions of interest.
102 | 
103 |         This function loads/saves from/to a cache file to speed up future calls.
104 |         """
105 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
106 |         if os.path.exists(cache_file):
107 |             with open(cache_file, 'rb') as fid:
108 |                 roidb = cp.load(fid)
109 |             print ('{} gt roidb loaded from {}'.format(self.name, cache_file))
110 |             return roidb
111 | 
112 |         gt_roidb = [self._load_pascal_annotation(index)
113 |                     for index in self.image_index]
114 |         with open(cache_file, 'wb') as fid:
115 |             cp.dump(gt_roidb, fid, cp.HIGHEST_PROTOCOL)
116 |         print ('wrote gt roidb to {}'.format(cache_file))
117 | 
118 |         return gt_roidb
119 | 
120 |     def selective_search_roidb(self):
121 |         """
122 |         Return the database of selective search regions of interest.
123 |         Ground-truth ROIs are also included.
124 | 
125 |         This function loads/saves from/to a cache file to speed up future calls.
126 |         """
127 |         cache_file = os.path.join(self.cache_path,
128 |                                   self.name + '_selective_search_roidb.pkl')
129 | 
130 |         if os.path.exists(cache_file):
131 |             with open(cache_file, 'rb') as fid:
132 |                 roidb = cp.load(fid, encoding='latin1')
133 |             print ('{} ss roidb loaded from {}'.format(self.name, cache_file))
134 |             return roidb
135 | 
136 |         if int(self._year) == 2007 or not self._image_set.startswith('test'):
137 |             gt_roidb = self.gt_roidb()
138 |             ss_roidb = self._load_selective_search_roidb(gt_roidb)
139 |             roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)
140 |         else:
141 |             roidb = self._load_selective_search_roidb(None)
142 | 
143 |         # Keep max of e.g. 2000 rois
144 |         if type(self._maxNrRois) == int:
145 |             print ("Only keep the first %d ROIs..." % self._maxNrRois)
146 |             for i in range(self.num_images):
147 |                 gt_overlaps = roidb[i]['gt_overlaps']
148 |                 gt_overlaps = gt_overlaps.todense()[:self._maxNrRois]
149 |                 gt_overlaps = scipy.sparse.csr_matrix(gt_overlaps)
150 |                 roidb[i]['boxes'] = roidb[i]['boxes'][:self._maxNrRois, :]
151 |                 roidb[i]['gt_classes'] = roidb[i]['gt_classes'][:self._maxNrRois]
152 |                 roidb[i]['gt_overlaps'] = roidb[i]['gt_overlaps'] = gt_overlaps
153 | 
154 |         with open(cache_file, 'wb') as fid:
155 |             cp.dump(roidb, fid, cp.HIGHEST_PROTOCOL)
156 |         print ('wrote ss roidb to {}'.format(cache_file))
157 | 
158 |         return roidb
159 | 
160 |     def _load_selective_search_roidb(self, gt_roidb):
161 |         filename = os.path.abspath(os.path.join(self._devkit_path, '..',
162 |                                                  'selective_search_data',
163 |                                                  self.name + '.mat'))
164 |         assert os.path.exists(filename), \
165 |                'Selective search data not found at: {}'.format(filename)
166 |         raw_data = sio.loadmat(filename)['boxes'].ravel()
167 | 
168 |         box_list = []
169 |         for i in range(raw_data.shape[0]):
170 |             box_list.append(raw_data[i][:, (1, 0, 3, 2)] - 1)
171 | 
172 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
173 | 
174 |     def selective_search_IJCV_roidb(self):
175 |         """
176 |         Return the database of selective search regions of interest.
177 |         Ground-truth ROIs are also included.
178 | 
179 |         This function loads/saves from/to a cache file to speed up future calls.
180 |         """
181 |         cache_file = os.path.join(self.cache_path,
182 |                 '{:s}_selective_search_IJCV_top_{:d}_roidb.pkl'.
183 |                 format(self.name, self.config['top_k']))
184 | 
185 |         if os.path.exists(cache_file):
186 |             with open(cache_file, 'rb') as fid:
187 |                 roidb = cp.load(fid)
188 |             print ('{} ss roidb loaded from {}'.format(self.name, cache_file))
189 |             return roidb
190 | 
191 |         gt_roidb = self.gt_roidb()
192 |         ss_roidb = self._load_selective_search_IJCV_roidb(gt_roidb)
193 |         roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)
194 |         with open(cache_file, 'wb') as fid:
195 |             cp.dump(roidb, fid, cp.HIGHEST_PROTOCOL)
196 |         print ('wrote ss roidb to {}'.format(cache_file))
197 | 
198 |         return roidb
199 | 
200 |     def _load_selective_search_IJCV_roidb(self, gt_roidb):
201 |         IJCV_path = os.path.abspath(os.path.join(self.cache_path, '..',
202 |                                                  'selective_search_IJCV_data',
203 |                                                  'voc_' + self._year))
204 |         assert os.path.exists(IJCV_path), \
205 |                'Selective search IJCV data not found at: {}'.format(IJCV_path)
206 | 
207 |         top_k = self.config['top_k']
208 |         box_list = []
209 |         for i in range(self.num_images):
210 |             filename = os.path.join(IJCV_path, self.image_index[i] + '.mat')
211 |             raw_data = sio.loadmat(filename)
212 |             box_list.append((raw_data['boxes'][:top_k, :]-1).astype(np.uint16))
213 | 
214 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
215 | 
216 |     def _load_pascal_annotation(self, index):
217 |         """
218 |         Load image and bounding boxes info from XML file in the PASCAL VOC
219 |         format.
220 |         """
221 |         filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
222 |         # print ('Loading: {}'.format(filename))
223 |         def get_data_from_tag(node, tag):
224 |             return node.getElementsByTagName(tag)[0].childNodes[0].data
225 | 
226 |         with open(filename) as f:
227 |             data = minidom.parseString(f.read())
228 | 
229 |         objs = data.getElementsByTagName('object')
230 |         num_objs = len(objs)
231 | 
232 |         boxes = np.zeros((num_objs, 4), dtype=np.uint16)
233 |         gt_classes = np.zeros((num_objs), dtype=np.int32)
234 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
235 | 
236 |         # Load object bounding boxes into a data frame.
237 |         for ix, obj in enumerate(objs):
238 |             # Make pixel indexes 0-based
239 |             x1 = float(get_data_from_tag(obj, 'xmin')) - 1
240 |             y1 = float(get_data_from_tag(obj, 'ymin')) - 1
241 |             x2 = float(get_data_from_tag(obj, 'xmax')) - 1
242 |             y2 = float(get_data_from_tag(obj, 'ymax')) - 1
243 |             cls = self._class_to_ind[
244 |                     str(get_data_from_tag(obj, "name")).lower().strip()]
245 |             boxes[ix, :] = [x1, y1, x2, y2]
246 |             gt_classes[ix] = cls
247 |             overlaps[ix, cls] = 1.0
248 | 
249 |         overlaps = scipy.sparse.csr_matrix(overlaps)
250 | 
251 |         return {'boxes' : boxes,
252 |                 'gt_classes': gt_classes,
253 |                 'gt_overlaps' : overlaps,
254 |                 'flipped' : False}
255 | 
256 |     def _write_voc_results_file(self, all_boxes, output_dir):
257 |         comp_id = 'comp4'
258 |         if self.config['use_salt']:
259 |             comp_id += '-{}'.format(os.getpid())
260 | 
261 |         for cls_ind, cls in enumerate(self.classes):
262 |             if cls == '__background__':
263 |                 continue
264 |             print ('Writing {} VOC results file'.format(cls))
265 |             filename = self._get_voc_results_file_template(output_dir).format(cls)
266 |             with open(filename, 'wt') as f:
267 |                 for im_ind, index in enumerate(self.image_index):
268 |                     dets = all_boxes[cls_ind][im_ind]
269 |                     if dets == []:
270 |                         continue
271 |                     # the VOCdevkit expects 1-based indices
272 |                     for k in range(dets.shape[0]):
273 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
274 |                                 format(index, dets[k, -1],
275 |                                        dets[k, 0] + 1, dets[k, 1] + 1,
276 |                                        dets[k, 2] + 1, dets[k, 3] + 1))
277 |         return comp_id
278 | 
279 |     def evaluate_detections(self, all_boxes, output_dir, boUsePythonImpl = True, use_07_metric = False):
280 |         self._write_voc_results_file(all_boxes, output_dir)
281 |         if not boUsePythonImpl:
282 |             self._do_matlab_eval(comp_id, output_dir)
283 |         else:
284 |             self._do_python_eval(output_dir, use_07_metric)
285 | 
286 |     def _do_matlab_eval(self, comp_id, output_dir='output'):
287 |         rm_results = self.config['cleanup']
288 | 
289 |         path = os.path.join(os.path.dirname(__file__),
290 |                             'VOCdevkit-matlab-wrapper')
291 |         cmd = 'cd {} && '.format(path)
292 |         cmd += '{:s} -nodisplay -nodesktop '.format(datasets.MATLAB)
293 |         cmd += '-r "dbstop if error; '
294 |         cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\',{:d}); quit;"' \
295 |                .format(self._devkit_path, comp_id,
296 |                        self._image_set, output_dir, int(rm_results))
297 |         print('Running:\n{}'.format(cmd))
298 |         status = subprocess.call(cmd, shell=True)
299 | 
300 |     def competition_mode(self, on):
301 |         if on:
302 |             self.config['use_salt'] = False
303 |             self.config['cleanup'] = False
304 |         else:
305 |             self.config['use_salt'] = True
306 |             self.config['cleanup'] = True
307 | 
308 |     #########################################################################
309 |     # Python evaluation functions (copied from faster-RCNN)
310 |     ##########################################################################
311 |     def _get_voc_results_file_template(self, evalDir):
312 |         if not os.path.exists(evalDir):
313 |             os.makedirs(evalDir)
314 |         filename = self._image_set + '_{:s}.txt'
315 |         return os.path.join(evalDir, filename)
316 | 
317 |     def _do_python_eval(self, output_dir='output', use_07_metric=None):
318 |         annopath = os.path.join(self._devkit_path, 'VOC' + self._year, 'Annotations', '{}.xml')
319 |         imagesetfile = os.path.join(
320 |             self._devkit_path,
321 |             'VOC' + self._year,
322 |             'ImageSets',
323 |             'Main',
324 |             self._image_set + '.txt')
325 |         aps = []
326 |         # The PASCAL VOC metric changed in 2010
327 |         if use_07_metric == None:
328 |             use_07_metric = True if int(self._year) < 2010 else False
329 | 
330 |         print ('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
331 |         if not os.path.isdir(output_dir):
332 |             os.mkdir(output_dir)
333 |         for i, cls in enumerate(self._classes):
334 |             if cls == '__background__':
335 |                 continue
336 |             filename = self._get_voc_results_file_template(output_dir).format(cls)
337 | 
338 |             rec, prec, ap = voc_eval(
339 |                 filename, annopath, imagesetfile, cls, cachedir = output_dir, ovthresh=0.5,
340 |                 use_07_metric=use_07_metric)
341 |             aps += [ap]
342 |             print('AP for {} = {:.4f}'.format(cls, ap))
343 |             with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
344 |                 cp.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
345 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
346 |         # print('~~~~~~~~')
347 |         # print('Results:')
348 |         # for ap in aps:
349 |         #     print('{:.3f}'.format(ap))
350 |         # print('{:.3f}'.format(np.mean(aps)))
351 |         # print('~~~~~~~~')
352 |         # print('')
353 |         print('--------------------------------------------------------------')
354 |         print('Results computed with the **unofficial** Python eval code.')
355 |         print('Results should be very close to the official MATLAB eval code.')
356 |         print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
357 |         print('-- Thanks, The Management')
358 |     print('--------------------------------------------------------------')
359 | 
360 | if __name__ == '__main__':
361 |     d = datasets.pascal_voc('trainval', '2007')
362 |     res = d.roidb
363 |     from IPython import embed; embed()
364 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/test.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Test a Fast R-CNN network on an imdb (image database)."""
  9 | 
 10 | from __future__ import print_function
 11 | import os, sys, cv2, numpy as np, pickle as cp, heapq
 12 | from .nms import nms as nmsPython
 13 | from .utils.cython_nms import nms
 14 | from .timer import Timer
 15 | from cntk_helpers import im_detect, apply_nms
 16 | from builtins import range
 17 | 
 18 | 
 19 | def _get_image_blob(im):
 20 |     """Converts an image into a network input.
 21 | 
 22 |     Arguments:
 23 |         im (ndarray): a color image in BGR order
 24 | 
 25 |     Returns:
 26 |         blob (ndarray): a data blob holding an image pyramid
 27 |         im_scale_factors (list): list of image scales (relative to im) used
 28 |             in the image pyramid
 29 |     """
 30 |     im_orig = im.astype(np.float32, copy=True)
 31 |     im_orig -= cfg.PIXEL_MEANS
 32 | 
 33 |     im_shape = im_orig.shape
 34 |     im_size_min = np.min(im_shape[0:2])
 35 |     im_size_max = np.max(im_shape[0:2])
 36 | 
 37 |     processed_ims = []
 38 |     im_scale_factors = []
 39 | 
 40 |     for target_size in cfg.TEST.SCALES:
 41 |         im_scale = float(target_size) / float(im_size_min)
 42 |         # Prevent the biggest axis from being more than MAX_SIZE
 43 |         if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 44 |             im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 45 |         im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 46 |                         interpolation=cv2.INTER_LINEAR)
 47 |         im_scale_factors.append(im_scale)
 48 |         processed_ims.append(im)
 49 | 
 50 |     # Create a blob to hold the input images
 51 |     blob = im_list_to_blob(processed_ims)
 52 | 
 53 |     return blob, np.array(im_scale_factors)
 54 | 
 55 | def _get_rois_blob(im_rois, im_scale_factors):
 56 |     """Converts RoIs into network inputs.
 57 | 
 58 |     Arguments:
 59 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 60 |         im_scale_factors (list): scale factors as returned by _get_image_blob
 61 | 
 62 |     Returns:
 63 |         blob (ndarray): R x 5 matrix of RoIs in the image pyramid
 64 |     """
 65 |     rois, levels = _project_im_rois(im_rois, im_scale_factors)
 66 |     rois_blob = np.hstack((levels, rois))
 67 |     return rois_blob.astype(np.float32, copy=False)
 68 | 
 69 | def _project_im_rois(im_rois, scales):
 70 |     """Project image RoIs into the image pyramid built by _get_image_blob.
 71 | 
 72 |     Arguments:
 73 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 74 |         scales (list): scale factors as returned by _get_image_blob
 75 | 
 76 |     Returns:
 77 |         rois (ndarray): R x 4 matrix of projected RoI coordinates
 78 |         levels (list): image pyramid levels used by each projected RoI
 79 |     """
 80 |     im_rois = im_rois.astype(np.float, copy=False)
 81 | 
 82 |     if len(scales) > 1:
 83 |         widths = im_rois[:, 2] - im_rois[:, 0] + 1
 84 |         heights = im_rois[:, 3] - im_rois[:, 1] + 1
 85 | 
 86 |         areas = widths * heights
 87 |         scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
 88 |         diff_areas = np.abs(scaled_areas - 224 * 224)
 89 |         levels = diff_areas.argmin(axis=1)[:, np.newaxis]
 90 |     else:
 91 |         levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
 92 | 
 93 |     rois = im_rois * scales[levels]
 94 | 
 95 |     return rois, levels
 96 | 
 97 | def _get_blobs(im, rois):
 98 |     """Convert an image and RoIs within that image into network inputs."""
 99 |     blobs = {'data' : None, 'rois' : None}
100 |     blobs['data'], im_scale_factors = _get_image_blob(im)
101 |     blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
102 |     return blobs, im_scale_factors
103 | 
104 | def _bbox_pred(boxes, box_deltas):
105 |     """Transform the set of class-agnostic boxes into class-specific boxes
106 |     by applying the predicted offsets (box_deltas)
107 |     """
108 |     if boxes.shape[0] == 0:
109 |         return np.zeros((0, box_deltas.shape[1]))
110 | 
111 |     boxes = boxes.astype(np.float, copy=False)
112 |     widths = boxes[:, 2] - boxes[:, 0] + cfg.EPS
113 |     heights = boxes[:, 3] - boxes[:, 1] + cfg.EPS
114 |     ctr_x = boxes[:, 0] + 0.5 * widths
115 |     ctr_y = boxes[:, 1] + 0.5 * heights
116 | 
117 |     dx = box_deltas[:, 0::4]
118 |     dy = box_deltas[:, 1::4]
119 |     dw = box_deltas[:, 2::4]
120 |     dh = box_deltas[:, 3::4]
121 | 
122 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
123 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
124 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
125 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
126 | 
127 |     pred_boxes = np.zeros(box_deltas.shape)
128 |     # x1
129 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
130 |     # y1
131 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
132 |     # x2
133 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
134 |     # y2
135 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
136 | 
137 |     return pred_boxes
138 | 
139 | def _clip_boxes(boxes, im_shape):
140 |     """Clip boxes to image boundaries."""
141 |     # x1 >= 0
142 |     boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
143 |     # y1 >= 0
144 |     boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
145 |     # x2 < im_shape[1]
146 |     boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
147 |     # y2 < im_shape[0]
148 |     boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
149 |     return boxes
150 | 
151 | # def im_detect(net, im, boxes):
152 | #     """Detect object classes in an image given object proposals.
153 | #
154 | #     Arguments:
155 | #         net (caffe.Net): Fast R-CNN network to use
156 | #         im (ndarray): color image to test (in BGR order)
157 | #         boxes (ndarray): R x 4 array of object proposals
158 | #
159 | #     Returns:
160 | #         scores (ndarray): R x K array of object class scores (K includes
161 | #             background as object category 0)
162 | #         boxes (ndarray): R x (4*K) array of predicted bounding boxes
163 | #     """
164 | #     blobs, unused_im_scale_factors = _get_blobs(im, boxes)
165 | #
166 | #     # When mapping from image ROIs to feature map ROIs, there's some aliasing
167 | #     # (some distinct image ROIs get mapped to the same feature ROI).
168 | #     # Here, we identify duplicate feature ROIs, so we only compute features
169 | #     # on the unique subset.
170 | #     if cfg.DEDUP_BOXES > 0:
171 | #         v = np.array([1, 1e3, 1e6, 1e9, 1e12])
172 | #         hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
173 | #         _, index, inv_index = np.unique(hashes, return_index=True,
174 | #                                         return_inverse=True)
175 | #         blobs['rois'] = blobs['rois'][index, :]
176 | #         boxes = boxes[index, :]
177 | #
178 | #     # reshape network inputs
179 | #     net.blobs['data'].reshape(*(blobs['data'].shape))
180 | #     net.blobs['rois'].reshape(*(blobs['rois'].shape))
181 | #     blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False),
182 | #                             rois=blobs['rois'].astype(np.float32, copy=False))
183 | #     if cfg.TEST.SVM:
184 | #         # use the raw scores before softmax under the assumption they
185 | #         # were trained as linear SVMs
186 | #         scores = net.blobs['cls_score'].data
187 | #     else:
188 | #         # use softmax estimated probabilities
189 | #         scores = blobs_out['cls_prob']
190 | #
191 | #     if cfg.TEST.BBOX_REG:
192 | #         # Apply bounding-box regression deltas
193 | #         box_deltas = blobs_out['bbox_pred']
194 | #         pred_boxes = _bbox_pred(boxes, box_deltas)
195 | #         pred_boxes = _clip_boxes(pred_boxes, im.shape)
196 | #     else:
197 | #         # Simply repeat the boxes, once for each class
198 | #         pred_boxes = np.tile(boxes, (1, scores.shape[1]))
199 | #
200 | #     if cfg.DEDUP_BOXES > 0:
201 | #         # Map scores and predictions back to the original set of boxes
202 | #         scores = scores[inv_index, :]
203 | #         pred_boxes = pred_boxes[inv_index, :]
204 | #
205 | #     return scores, pred_boxes
206 | 
207 | def vis_detections(im, class_name, dets, thresh=0.3):
208 |     """Visual debugging of detections."""
209 |     import matplotlib.pyplot as plt
210 |     im = im[:, :, (2, 1, 0)]
211 |     for i in range(np.minimum(10, dets.shape[0])):
212 |         bbox = dets[i, :4]
213 |         score = dets[i, -1]
214 |         if score > thresh:
215 |             plt.cla()
216 |             plt.imshow(im)
217 |             plt.gca().add_patch(
218 |                 plt.Rectangle((bbox[0], bbox[1]),
219 |                               bbox[2] - bbox[0],
220 |                               bbox[3] - bbox[1], fill=False,
221 |                               edgecolor='g', linewidth=3)
222 |                 )
223 |             plt.title('{}  {:.3f}'.format(class_name, score))
224 |             plt.show()
225 | 
226 | def test_net(net, imdb, output_dir, feature_scale, classifier = 'svm', nmsThreshold = 0.3,
227 |              boUsePythonImpl = False, boThresholdDetections = True, boApplyNms = True):
228 |     """Test a Fast R-CNN network on an image database."""
229 |     num_images = len(imdb.image_index)
230 |     # heuristic: keep an average of 40 detections per class per images prior
231 |     # to NMS
232 |     max_per_set = 40 * num_images
233 |     # heuristic: keep at most 100 detection per class per image prior to NMS
234 |     max_per_image = 100
235 |     # detection thresold for each class (this is adaptively set based on the
236 |     # max_per_set constraint)
237 |     thresh = -np.inf * np.ones(imdb.num_classes)
238 |     # top_scores will hold one minheap of scores per class (used to enforce
239 |     # the max_per_set constraint)
240 |     top_scores = [[] for _ in range(imdb.num_classes)]
241 |     # all detections are collected into:
242 |     #    all_boxes[cls][image] = N x 5 array of detections in
243 |     #    (x1, y1, x2, y2, score)
244 |     all_boxes = [[[] for _ in range(num_images)]
245 |                  for _ in range(imdb.num_classes)]
246 | 
247 |     #output_dir = get_output_dir(imdb, net)
248 | 
249 |     # timers
250 |     _t = {'im_detect' : Timer(), 'misc' : Timer()}
251 |     roidb = imdb.roidb
252 | 
253 |     if not boThresholdDetections:
254 |         for i in range(num_images):
255 |             if i % 1000 == 0:
256 |                 print ("   Processing image {} of {}..".format(i, num_images))
257 |             scores, _, _ = im_detect(net, i, roidb[i]['boxes'], feature_scale=feature_scale, classifier=classifier)
258 | 
259 |             for j in range(1, imdb.num_classes):
260 |                 inds = np.where(roidb[i]['gt_classes'] == 0)[0]
261 |                 cls_scores = scores[inds, j]
262 |                 cls_boxes = roidb[i]['boxes'][inds]
263 |                 all_boxes[j][i] = \
264 |                     np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
265 |                         .astype(np.float32, copy=False)
266 | 
267 |     else:
268 |         for i in range(num_images):
269 |             if i % 1000 == 0:
270 |                 print ("   Processing image {} of {}..".format(i, num_images))
271 |             #im = cv2.imread(imdb.image_path_at(i))
272 |             #_t['im_detect'].tic()
273 |             scores, _, _ = im_detect(net, i, roidb[i]['boxes'], feature_scale = feature_scale, classifier = classifier)
274 |             #_t['im_detect'].toc()
275 | 
276 |             _t['misc'].tic()
277 |             for j in range(1, imdb.num_classes):
278 |                 inds = np.where((scores[:, j] > thresh[j]) &
279 |                                 (roidb[i]['gt_classes'] == 0))[0]
280 |                 cls_scores = scores[inds, j]
281 | 
282 |                 # cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
283 |                 boxes = roidb[i]['boxes']
284 |                 cls_boxes = boxes[inds]
285 | 
286 |                 top_inds = np.argsort(-cls_scores)[:max_per_image]
287 |                 cls_scores = cls_scores[top_inds]
288 |                 cls_boxes = cls_boxes[top_inds, :]
289 |                 # push new scores onto the minheap
290 |                 for val in cls_scores:
291 |                     heapq.heappush(top_scores[j], val)
292 |                 # if we've collected more than the max number of detection,
293 |                 # then pop items off the minheap and update the class threshold
294 |                 if len(top_scores[j]) > max_per_set:
295 |                     while len(top_scores[j]) > max_per_set:
296 |                         heapq.heappop(top_scores[j])
297 |                     thresh[j] = top_scores[j][0]
298 | 
299 |                 all_boxes[j][i] = \
300 |                         np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
301 |                         .astype(np.float32, copy=False)
302 | 
303 |                 #visualize rois
304 |                 if False:
305 |                     im = cv2.imread(imdb.image_path_at(i))
306 |                     if boUsePythonImpl:
307 |                         keep = nmsPython.nms(all_boxes[j][i], 0.3)
308 |                     else:
309 |                         keep = nms(all_boxes[j][i], 0.3)
310 |                     vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :])
311 |             _t['misc'].toc()
312 | 
313 |             # print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
314 |             #       .format(i + 1, num_images, _t['im_detect'].average_time,
315 |             #               _t['misc'].average_time))
316 | 
317 |         #keep only the boxes with highest score for each class
318 |         #   shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coord+score
319 |         for j in range(1, imdb.num_classes):
320 |             for i in range(num_images):
321 |                 inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0]
322 |                 if len(inds) == 0:
323 |                     all_boxes[j][i] = []
324 |                 else:
325 |                     all_boxes[j][i] = all_boxes[j][i][inds, :]
326 | 
327 |     if output_dir:
328 |         det_file = os.path.join(output_dir, 'detections.pkl')
329 |         with open(det_file, 'wb') as f:
330 |             cp.dump(all_boxes, f, cp.HIGHEST_PROTOCOL)
331 | 
332 |     if boApplyNms:
333 |         print ("Number of rois before non-maxima surpression: %d" % sum([len(all_boxes[i][j]) for i in range(imdb.num_classes) for j in range(imdb.num_images)]))
334 |         nms_dets,_ = apply_nms(all_boxes, nmsThreshold, boUsePythonImpl)
335 |         print ("Number of rois  after non-maxima surpression: %d" % sum([len(nms_dets[i][j]) for i in range(imdb.num_classes) for j in range(imdb.num_images)]))
336 |     else:
337 |         print ("Skipping non-maxima surpression")
338 |         nms_dets = all_boxes
339 | 
340 |     print ('Evaluating detections')
341 |     imdb.evaluate_detections(nms_dets, output_dir)
342 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/train_svms.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Train post-hoc SVMs using the algorithm and hyper-parameters from
 12 | traditional R-CNN.
 13 | """
 14 | 
 15 | from timer import Timer
 16 | from sklearn import svm
 17 | import numpy as np
 18 | 
 19 | 
 20 | 
 21 | #################################################
 22 | # Slightly modified SVM training functions
 23 | #################################################
 24 | class SVMTrainer(object):
 25 |     """
 26 |     Trains post-hoc detection SVMs for all classes using the algorithm
 27 |     and hyper-parameters of traditional R-CNN.
 28 |     """
 29 | 
 30 |     def __init__(self, net, imdb, im_detect, svmWeightsPath, svmBiasPath, svmFeatScalePath,
 31 |                  svm_C, svm_B, svm_nrEpochs, svm_retrainLimit, svm_evictThreshold, svm_posWeight,
 32 |                  svm_targetNorm, svm_penality, svm_loss, svm_rngSeed):
 33 |         self.net = net
 34 |         self.imdb = imdb
 35 |         self.im_detect = im_detect
 36 |         self.svm_nrEpochs = svm_nrEpochs
 37 |         self.svm_targetNorm = svm_targetNorm
 38 |         self.svmWeightsPath = svmWeightsPath
 39 |         self.svmBiasPath = svmBiasPath
 40 |         self.svmFeatScalePath = svmFeatScalePath
 41 |         self.layer = 'fc7'
 42 |         self.hard_thresh = -1.0001
 43 |         self.neg_iou_thresh = 0.3
 44 |         dim = net.params['cls_score'][0].data.shape[1]
 45 |         self.feature_scale = self._get_feature_scale()
 46 |         print('Feature dim: {}'.format(dim))
 47 |         print('Feature scale: {:.3f}'.format(self.feature_scale))
 48 |         self.trainers = [SVMClassTrainer(cls, dim, self.feature_scale, svm_C, svm_B, svm_posWeight, svm_penality, svm_loss,
 49 |                                          svm_rngSeed, svm_retrainLimit, svm_evictThreshold) for cls in imdb.classes]
 50 | 
 51 | 
 52 |     def _get_feature_scale(self, num_images=100):
 53 |         _t = Timer()
 54 |         roidb = self.imdb.roidb
 55 |         total_norm = 0.0
 56 |         total_sum = 0.0
 57 |         count = 0.0
 58 |         num_images = min(num_images, self.imdb.num_images)
 59 |         inds = np.random.choice(range(self.imdb.num_images), size=num_images, replace=False)
 60 | 
 61 |         for i_, i in enumerate(inds):
 62 |             #im = cv2.imread(self.imdb.image_path_at(i))
 63 |             #if roidb[i]['flipped']:
 64 |             #    im = im[:, ::-1, :]
 65 |             #im = self.imdb.image_path_at(i)
 66 |             _t.tic()
 67 |             scores, boxes, feat = self.im_detect(self.net, i, roidb[i]['boxes'], boReturnClassifierScore = False)
 68 |             _t.toc()
 69 |             #feat = self.net.blobs[self.layer].data
 70 |             total_norm += np.sqrt((feat ** 2).sum(axis=1)).sum()
 71 |             total_sum += 1.0 * sum(sum(feat)) / len(feat)
 72 |             count += feat.shape[0]
 73 |             print('{}/{}: avg feature norm: {:.3f}, average value: {:.3f}'.format(i_ + 1, num_images,
 74 |                                                            total_norm / count, total_sum / count))
 75 | 
 76 |         return self.svm_targetNorm * 1.0 / (total_norm / count)
 77 | 
 78 |     def _get_pos_counts(self):
 79 |         counts = np.zeros((len(self.imdb.classes)), dtype=np.int)
 80 |         roidb = self.imdb.roidb
 81 |         for i in range(len(roidb)):
 82 |             for j in range(1, self.imdb.num_classes):
 83 |                 I = np.where(roidb[i]['gt_classes'] == j)[0]
 84 |                 counts[j] += len(I)
 85 | 
 86 |         for j in range(1, self.imdb.num_classes):
 87 |             print('class {:s} has {:d} positives'.
 88 |                   format(self.imdb.classes[j], counts[j]))
 89 | 
 90 |         return counts
 91 | 
 92 |     def get_pos_examples(self):
 93 |         counts = self._get_pos_counts()
 94 |         for i in range(len(counts)):
 95 |             self.trainers[i].alloc_pos(counts[i])
 96 | 
 97 |         _t = Timer()
 98 |         roidb = self.imdb.roidb
 99 |         num_images = len(roidb)
100 |         for i in range(num_images):
101 |             #im = cv2.imread(self.imdb.image_path_at(i))
102 |             #if roidb[i]['flipped']:
103 |             #    im = im[:, ::-1, :]
104 |             #im = self.imdb.image_path_at(i)
105 |             gt_inds = np.where(roidb[i]['gt_classes'] > 0)[0]
106 |             gt_boxes = roidb[i]['boxes'][gt_inds]
107 |             _t.tic()
108 |             scores, boxes, feat = self.im_detect(self.net, i, gt_boxes, self.feature_scale, gt_inds, boReturnClassifierScore = False)
109 |             _t.toc()
110 |             #feat = self.net.blobs[self.layer].data
111 |             for j in range(1, self.imdb.num_classes):
112 |                 cls_inds = np.where(roidb[i]['gt_classes'][gt_inds] == j)[0]
113 |                 if len(cls_inds) > 0:
114 |                     cls_feat = feat[cls_inds, :]
115 |                     self.trainers[j].append_pos(cls_feat)
116 |             if i % 50 == 0:
117 |                 print('get_pos_examples: {:d}/{:d} {:.3f}s' \
118 |                       .format(i + 1, len(roidb), _t.average_time))
119 | 
120 |     def initialize_net(self):
121 |         # Start all SVM parameters at zero
122 |         self.net.params['cls_score'][0].data[...] = 0
123 |         self.net.params['cls_score'][1].data[...] = 0
124 | 
125 |         # Initialize SVMs in a smart way. Not doing this because its such
126 |         # a good initialization that we might not learn something close to
127 |         # the SVM solution.
128 |     #        # subtract background weights and biases for the foreground classes
129 |     #        w_bg = self.net.params['cls_score'][0].data[0, :]
130 |     #        b_bg = self.net.params['cls_score'][1].data[0]
131 |     #        self.net.params['cls_score'][0].data[1:, :] -= w_bg
132 |     #        self.net.params['cls_score'][1].data[1:] -= b_bg
133 |     #        # set the background weights and biases to 0 (where they shall remain)
134 |     #        self.net.params['cls_score'][0].data[0, :] = 0
135 |     #        self.net.params['cls_score'][1].data[0] = 0
136 | 
137 |     def update_net(self, cls_ind, w, b):
138 |         self.net.params['cls_score'][0].data[cls_ind, :] = w
139 |         self.net.params['cls_score'][1].data[cls_ind] = b
140 | 
141 |     def train_with_hard_negatives(self):
142 |         _t = Timer()
143 |         roidb = self.imdb.roidb
144 |         num_images = len(roidb)
145 | 
146 |         for epoch in range(0,self.svm_nrEpochs):
147 | 
148 |             # num_images = 100
149 |             for i in range(num_images):
150 |                 print("*** EPOCH = %d, IMAGE = %d *** " % (epoch, i))
151 |                 #im = cv2.imread(self.imdb.image_path_at(i))
152 |                 #if roidb[i]['flipped']:
153 |                 #    im = im[:, ::-1, :]
154 |                 #im = self.imdb.image_path_at(i)
155 |                 _t.tic()
156 |                 scores, boxes, feat = self.im_detect(self.net, i, roidb[i]['boxes'], self.feature_scale)
157 |                 _t.toc()
158 |                 #feat = self.net.blobs[self.layer].data
159 |                 for j in range(1, self.imdb.num_classes):
160 |                     hard_inds = \
161 |                         np.where((scores[:, j] > self.hard_thresh) &
162 |                                  (roidb[i]['gt_overlaps'][:, j].toarray().ravel() <
163 |                                   self.neg_iou_thresh))[0]
164 |                     if len(hard_inds) > 0:
165 |                         hard_feat = feat[hard_inds, :].copy()
166 |                         new_w_b = \
167 |                             self.trainers[j].append_neg_and_retrain(feat=hard_feat)
168 |                         if new_w_b is not None:
169 |                             self.update_net(j, new_w_b[0], new_w_b[1])
170 |                             np.savetxt(self.svmWeightsPath[:-4]   + "_epoch" + str(epoch) + ".txt", self.net.params['cls_score'][0].data)
171 |                             np.savetxt(self.svmBiasPath[:-4]      + "_epoch" + str(epoch) + ".txt", self.net.params['cls_score'][1].data)
172 |                             np.savetxt(self.svmFeatScalePath[:-4] + "_epoch" + str(epoch) + ".txt", [self.feature_scale])
173 | 
174 |             print(('train_with_hard_negatives: '
175 |                    '{:d}/{:d} {:.3f}s').format(i + 1, len(roidb),
176 |                                                _t.average_time))
177 | 
178 |     def train(self):
179 |         # Initialize SVMs using
180 |         #   a. w_i = fc8_w_i - fc8_w_0
181 |         #   b. b_i = fc8_b_i - fc8_b_0
182 |         #   c. Install SVMs into net
183 |         self.initialize_net()
184 | 
185 |         # Pass over roidb to count num positives for each class
186 |         #   a. Pre-allocate arrays for positive feature vectors
187 |         # Pass over roidb, computing features for positives only
188 |         self.get_pos_examples()
189 | 
190 |         # Pass over roidb
191 |         #   a. Compute cls_score with forward pass
192 |         #   b. For each class
193 |         #       i. Select hard negatives
194 |         #       ii. Add them to cache
195 |         #   c. For each class
196 |         #       i. If SVM retrain criteria met, update SVM
197 |         #       ii. Install new SVM into net
198 |         self.train_with_hard_negatives()
199 | 
200 |         # One final SVM retraining for each class
201 |         # Install SVMs into net
202 |         for j in range(1, self.imdb.num_classes):
203 |             new_w_b = self.trainers[j].append_neg_and_retrain(force=True)
204 |             self.update_net(j, new_w_b[0], new_w_b[1])
205 | 
206 |         #save svm
207 |         np.savetxt(self.svmWeightsPath,   self.net.params['cls_score'][0].data)
208 |         np.savetxt(self.svmBiasPath,      self.net.params['cls_score'][1].data)
209 |         np.savetxt(self.svmFeatScalePath, [self.feature_scale])
210 | 
211 | 
212 | class SVMClassTrainer(object):
213 |     """Manages post-hoc SVM training for a single object class."""
214 | 
215 |     def __init__(self, cls, dim, feature_scale,
216 |                  C, B, pos_weight, svm_penality, svm_loss, svm_rngSeed, svm_retrainLimit, svm_evictThreshold):
217 |         self.pos = np.zeros((0, dim), dtype=np.float32)
218 |         self.neg = np.zeros((0, dim), dtype=np.float32)
219 |         self.B = B
220 |         self.C = C
221 |         self.cls = cls
222 |         self.pos_weight = pos_weight
223 |         self.dim = dim
224 |         self.feature_scale = feature_scale
225 |         if type(pos_weight) == str:  #e.g. pos_weight == 'auto'
226 |             class_weight = pos_weight
227 |         else:
228 |             class_weight = {1: pos_weight, -1: 1}
229 | 
230 |         self.svm = svm.LinearSVC(C=C, class_weight=class_weight,
231 |                                  intercept_scaling=B, verbose=1,
232 |                                  penalty=svm_penality, loss=svm_loss,
233 |                                  random_state=svm_rngSeed, dual=True)
234 | 
235 |         self.pos_cur = 0
236 |         self.num_neg_added = 0
237 |         self.retrain_limit = svm_retrainLimit
238 |         self.evict_thresh = svm_evictThreshold
239 |         self.loss_history = []
240 | 
241 |     def alloc_pos(self, count):
242 |         self.pos_cur = 0
243 |         self.pos = np.zeros((count, self.dim), dtype=np.float32)
244 | 
245 |     def append_pos(self, feat):
246 |         num = feat.shape[0]
247 |         self.pos[self.pos_cur:self.pos_cur + num, :] = feat
248 |         self.pos_cur += num
249 | 
250 |     def train(self):
251 |         print('>>> Updating {} detector <<<'.format(self.cls))
252 |         num_pos = self.pos.shape[0]
253 |         num_neg = self.neg.shape[0]
254 |         print('Cache holds {} pos examples and {} neg examples'.
255 |               format(num_pos, num_neg))
256 |         X = np.vstack((self.pos, self.neg)) * self.feature_scale
257 |         y = np.hstack((np.ones(num_pos),
258 |                        -np.ones(num_neg)))
259 |         self.svm.fit(X, y)
260 |         w = self.svm.coef_
261 |         b = self.svm.intercept_[0]
262 | 
263 |         scores = self.svm.decision_function(X)
264 |         pos_scores = scores[:num_pos]
265 |         neg_scores = scores[num_pos:]
266 | 
267 |         num_neg_wrong = sum(neg_scores > 0)
268 |         num_pos_wrong = sum(pos_scores < 0)
269 |         meanAcc = 0.5 * (num_pos - num_pos_wrong) / num_pos + 0.5*(num_neg - num_neg_wrong) / num_neg
270 |         if type(self.pos_weight) == str:
271 |             pos_loss = 0
272 |         else:
273 |             pos_loss = (self.C * self.pos_weight *
274 |                         np.maximum(0, 1 - pos_scores).sum())
275 |         neg_loss = self.C * np.maximum(0, 1 + neg_scores).sum()
276 |         reg_loss = 0.5 * np.dot(w.ravel(), w.ravel()) + 0.5 * b ** 2
277 |         tot_loss = pos_loss + neg_loss + reg_loss
278 |         self.loss_history.append((meanAcc, num_pos_wrong, num_pos, num_neg_wrong, num_neg, tot_loss, pos_loss, neg_loss, reg_loss))
279 |         for i, losses in enumerate(self.loss_history):
280 |             print(('    {:4d}: meanAcc={:.3f} -- pos wrong: {:5}/{:5}; neg wrong: {:5}/{:5};  '
281 |                    '     obj val: {:.3f} = {:.3f}  (posUnscaled) + {:.3f} (neg) + {:.3f} (reg)').format(i, *losses))
282 | 
283 |         # Sanity check
284 | 
285 |         scores_ret = (
286 |                          X * 1.0 / self.feature_scale).dot(w.T * self.feature_scale) + b
287 |         assert np.allclose(scores, scores_ret[:, 0], atol=1e-5), \
288 |                 "Scores from returned model don't match decision function"
289 | 
290 |         return ((w * self.feature_scale, b), pos_scores, neg_scores)
291 | 
292 |     def append_neg_and_retrain(self, feat=None, force=False):
293 |         if feat is not None:
294 |             num = feat.shape[0]
295 |             self.neg = np.vstack((self.neg, feat))
296 |             self.num_neg_added += num
297 |         if self.num_neg_added > self.retrain_limit or force:
298 |             self.num_neg_added = 0
299 |             new_w_b, pos_scores, neg_scores = self.train()
300 |             # scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
301 |             # easy_inds = np.where(neg_scores < self.evict_thresh)[0]
302 |             print('    Pruning easy negatives')
303 |             print('         before pruning: #neg = ' + str(len(self.neg)))
304 |             not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
305 |             if len(not_easy_inds) > 0:
306 |                 self.neg = self.neg[not_easy_inds, :]
307 |                 # self.neg = np.delete(self.neg, easy_inds)
308 |             print('         after pruning: #neg = ' + str(len(self.neg)))
309 |             print('    Cache holds {} pos examples and {} neg examples'.
310 |                   format(self.pos.shape[0], self.neg.shape[0]))
311 |             print('    {} pos support vectors'.format((pos_scores <= 1).sum()))
312 |             print('    {} neg support vectors'.format((neg_scores >= -1).sum()))
313 |             return new_w_b
314 |         else:
315 |             return None
316 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/utils/cython_bbox.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/fastRCNN/utils/cython_bbox.pyd


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/utils/cython_bbox.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/fastRCNN/utils/cython_bbox.so


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/utils/cython_nms.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/fastRCNN/utils/cython_nms.pyd


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/utils/cython_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Retail_ImageClassificationStockkeeping/fastRCNN/utils/cython_nms.so


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/fastRCNN/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | from __future__ import print_function
  8 | import xml.etree.ElementTree as ET
  9 | import os
 10 | import pickle as cp
 11 | import numpy as np
 12 | 
 13 | def parse_rec(filename):
 14 |     """ Parse a PASCAL VOC xml file """
 15 |     tree = ET.parse(filename)
 16 |     objects = []
 17 |     for obj in tree.findall('object'):
 18 |         obj_struct = {}
 19 |         obj_struct['name'] = obj.find('name').text
 20 |         obj_struct['pose'] = obj.find('pose').text
 21 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 22 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 23 |         bbox = obj.find('bndbox')
 24 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 25 |                               int(bbox.find('ymin').text),
 26 |                               int(bbox.find('xmax').text),
 27 |                               int(bbox.find('ymax').text)]
 28 |         objects.append(obj_struct)
 29 | 
 30 |     return objects
 31 | 
 32 | def voc_ap(rec, prec, use_07_metric=False):
 33 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 34 |     Compute VOC AP given precision and recall.
 35 |     If use_07_metric is true, uses the
 36 |     VOC 07 11 point method (default:False).
 37 |     """
 38 |     if use_07_metric:
 39 |         # 11 point metric
 40 |         ap = 0.
 41 |         for t in np.arange(0., 1.1, 0.1):
 42 |             if np.sum(rec >= t) == 0:
 43 |                 p = 0
 44 |             else:
 45 |                 p = np.max(prec[rec >= t])
 46 |             ap = ap + p / 11.
 47 |     else:
 48 |         # correct AP calculation
 49 |         # first append sentinel values at the end
 50 |         mrec = np.concatenate(([0.], rec, [1.]))
 51 |         mpre = np.concatenate(([0.], prec, [0.]))
 52 | 
 53 |         # compute the precision envelope
 54 |         for i in range(mpre.size - 1, 0, -1):
 55 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 56 | 
 57 |         # to calculate area under PR curve, look for points
 58 |         # where X axis (recall) changes value
 59 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 60 | 
 61 |         # and sum (\Delta recall) * prec
 62 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 63 |     return ap
 64 | 
 65 | def voc_eval(detpath,
 66 |              annopath,
 67 |              imagesetfile,
 68 |              classname,
 69 |              cachedir,
 70 |              ovthresh=0.5,
 71 |              use_07_metric=False):
 72 |     """rec, prec, ap = voc_eval(detpath,
 73 |                                 annopath,
 74 |                                 imagesetfile,
 75 |                                 classname,
 76 |                                 [ovthresh],
 77 |                                 [use_07_metric])
 78 | 
 79 |     Top level function that does the PASCAL VOC evaluation.
 80 | 
 81 |     detpath: Path to detections
 82 |         detpath.format(classname) should produce the detection results file.
 83 |     annopath: Path to annotations
 84 |         annopath.format(imagename) should be the xml annotations file.
 85 |     imagesetfile: Text file containing the list of images, one image per line.
 86 |     classname: Category name (duh)
 87 |     cachedir: Directory for caching the annotations
 88 |     [ovthresh]: Overlap threshold (default = 0.5)
 89 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 90 |         (default False)
 91 |     """
 92 |     # assumes detections are in detpath.format(classname)
 93 |     # assumes annotations are in annopath.format(imagename)
 94 |     # assumes imagesetfile is a text file with each line an image name
 95 |     # cachedir caches the annotations in a pickle file
 96 | 
 97 |     # first load gt
 98 |     if cachedir:
 99 |         if not os.path.isdir(cachedir):
100 |             os.mkdir(cachedir)
101 |         cachefile = os.path.join(cachedir, 'annots.pkl')
102 |     # read list of images
103 |     with open(imagesetfile, 'r') as f:
104 |         lines = f.readlines()
105 |     imagenames = [x.strip() for x in lines]
106 | 
107 |     if not cachedir or not os.path.isfile(cachefile):
108 |         # load annots
109 |         recs = {}
110 |         for i, imagename in enumerate(imagenames):
111 |             recs[imagename] = parse_rec(annopath.format(imagename))
112 |             if i % 1000 == 0:
113 |                 print ('Reading annotation for {:d}/{:d}'.format(
114 |                     i + 1, len(imagenames)))
115 |         # save
116 |         if cachedir:
117 |             print ('Saving cached annotations to {:s}'.format(cachefile))
118 |             with open(cachefile, 'wb') as f:
119 |                 cp.dump(recs, f)
120 |     else:
121 |         # load
122 |         with open(cachefile, 'rb') as f:
123 |             recs = cp.load(f)
124 | 
125 |     # extract gt objects for this class
126 |     class_recs = {}
127 |     npos = 0
128 |     for imagename in imagenames:
129 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
130 |         bbox = np.array([x['bbox'] for x in R])
131 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
132 |         det = [False] * len(R)
133 |         npos = npos + sum(~difficult)
134 |         class_recs[imagename] = {'bbox': bbox,
135 |                                  'difficult': difficult,
136 |                                  'det': det}
137 | 
138 |     # read dets
139 |     detfile = detpath.format(classname)
140 |     with open(detfile, 'r') as f:
141 |         lines = f.readlines()
142 | 
143 |     splitlines = [x.strip().split(' ') for x in lines]
144 |     image_ids = [x[0] for x in splitlines]
145 |     confidence = np.array([float(x[1]) for x in splitlines])
146 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
147 | 
148 |     # sort by confidence
149 |     sorted_ind = np.argsort(-confidence)
150 |     sorted_scores = np.sort(-confidence)
151 | 
152 |     BB = BB[sorted_ind, :]
153 |     image_ids = [image_ids[x] for x in sorted_ind]
154 | 
155 |     # go down dets and mark TPs and FPs
156 |     nd = len(image_ids)
157 |     tp = np.zeros(nd)
158 |     fp = np.zeros(nd)
159 |     for d in range(nd):
160 |         R = class_recs[image_ids[d]]
161 |         bb = BB[d, :].astype(float)
162 |         ovmax = -np.inf
163 |         BBGT = R['bbox'].astype(float)
164 | 
165 |         if BBGT.size > 0:
166 |             # compute overlaps
167 |             # intersection
168 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
169 |             iymin = np.maximum(BBGT[:, 1], bb[1])
170 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
171 |             iymax = np.minimum(BBGT[:, 3], bb[3])
172 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
173 |             ih = np.maximum(iymax - iymin + 1., 0.)
174 |             inters = iw * ih
175 | 
176 |             # union
177 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
178 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
179 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
180 | 
181 |             overlaps = inters / uni
182 |             ovmax = np.max(overlaps)
183 |             jmax = np.argmax(overlaps)
184 | 
185 |         if ovmax > ovthresh:
186 |             if not R['difficult'][jmax]:
187 |                 if not R['det'][jmax]:
188 |                     tp[d] = 1.
189 |                     R['det'][jmax] = 1
190 |                 else:
191 |                     fp[d] = 1.
192 |         else:
193 |             fp[d] = 1.
194 | 
195 |     # compute precision recall
196 |     fp = np.cumsum(fp)
197 |     tp = np.cumsum(tp)
198 |     rec = tp / float(npos)
199 |     # avoid divide by zero in case the first detection matches a difficult
200 |     # ground truth
201 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
202 |     ap = voc_ap(rec, prec, use_07_metric)
203 | 
204 |     return rec, prec, ap
205 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/selectivesearch/README.md:
--------------------------------------------------------------------------------
 1 | # Selective Search
 2 | 
 3 | This code is a revision of the selective search implementation at:
 4 | https://github.com/AlpacaDB/selectivesearch, such that the code can work under Python 3 environment.
 5 | 
 6 | This file is based on or incorporates material from the projects listed below (Third Party OSS). The original copyright notice and the license under which Microsoft received such Third Party OSS, are set forth below. Such licenses and notices are provided for informational purposes only. Microsoft licenses the Third Party OSS to you under the licensing terms for the Microsoft product or service. Microsoft reserves all other rights not expressly granted under this agreement, whether by implication, estoppel or otherwise.
 7 | 
 8 | `alpacadb-selectivesearch`  
 9 | Copyright (c) 2015-2016 AlpacaDB  
10 | Copyright (c) 2016 Oussama ENNAFII
11 | 
12 | Provided for Informational Purposes Only
13 | 
14 | MIT License
15 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the Software), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
16 | 
17 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/selectivesearch/__init__.py:
--------------------------------------------------------------------------------
1 | from .selectivesearch import selective_search
2 | 
3 | 


--------------------------------------------------------------------------------
/Retail_ImageClassificationStockkeeping/selectivesearch/selectivesearch.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import sys
  3 | from past.builtins import cmp
  4 | import skimage.io
  5 | import skimage.feature
  6 | import skimage.color
  7 | import skimage.transform
  8 | import skimage.util
  9 | import skimage.segmentation
 10 | import numpy
 11 | 
 12 | 
 13 | # "Selective Search for Object Recognition" by J.R.R. Uijlings et al.
 14 | #
 15 | #  - Modified version with LBP extractor for texture vectorization
 16 | 
 17 | 
 18 | def _generate_segments(im_orig, scale, sigma, min_size):
 19 |     """
 20 |         segment smallest regions by the algorithm of Felzenswalb and
 21 |         Huttenlocher
 22 |     """
 23 | 
 24 |     # open the Image
 25 |     im_mask = skimage.segmentation.felzenszwalb(
 26 |         skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma,
 27 |         min_size=min_size)
 28 | 
 29 |     # merge mask channel to the image as a 4th channel
 30 |     im_orig = numpy.append(
 31 |         im_orig, numpy.zeros(im_orig.shape[:2])[:, :, numpy.newaxis], axis=2)
 32 |     im_orig[:, :, 3] = im_mask
 33 | 
 34 |     return im_orig
 35 | 
 36 | 
 37 | def _sim_colour(r1, r2):
 38 |     """
 39 |         calculate the sum of histogram intersection of colour
 40 |     """
 41 |     return sum([min(a, b) for a, b in zip(r1["hist_c"], r2["hist_c"])])
 42 | 
 43 | 
 44 | def _sim_texture(r1, r2):
 45 |     """
 46 |         calculate the sum of histogram intersection of texture
 47 |     """
 48 |     return sum([min(a, b) for a, b in zip(r1["hist_t"], r2["hist_t"])])
 49 | 
 50 | 
 51 | def _sim_size(r1, r2, imsize):
 52 |     """
 53 |         calculate the size similarity over the image
 54 |     """
 55 |     return 1.0 - (r1["size"] + r2["size"]) / imsize
 56 | 
 57 | 
 58 | def _sim_fill(r1, r2, imsize):
 59 |     """
 60 |         calculate the fill similarity over the image
 61 |     """
 62 |     bbsize = (
 63 |         (max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"]))
 64 |         * (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"]))
 65 |     )
 66 |     return 1.0 - (bbsize - r1["size"] - r2["size"]) / imsize
 67 | 
 68 | 
 69 | def _calc_sim(r1, r2, imsize):
 70 |     return (_sim_colour(r1, r2) + _sim_texture(r1, r2)
 71 |             + _sim_size(r1, r2, imsize) + _sim_fill(r1, r2, imsize))
 72 | 
 73 | 
 74 | def _calc_colour_hist(img):
 75 |     """
 76 |         calculate colour histogram for each region
 77 | 
 78 |         the size of output histogram will be BINS * COLOUR_CHANNELS(3)
 79 | 
 80 |         number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf]
 81 | 
 82 |         extract HSV
 83 |     """
 84 | 
 85 |     BINS = 25
 86 |     hist = numpy.array([])
 87 | 
 88 |     for colour_channel in (0, 1, 2):
 89 | 
 90 |         # extracting one colour channel
 91 |         c = img[:, colour_channel]
 92 | 
 93 |         # calculate histogram for each colour and join to the result
 94 |         hist = numpy.concatenate(
 95 |             [hist] + [numpy.histogram(c, BINS, (0.0, 255.0))[0]])
 96 | 
 97 |     # L1 normalize
 98 |     hist = hist / len(img)
 99 | 
100 |     return hist
101 | 
102 | 
103 | def _calc_texture_gradient(img):
104 |     """
105 |         calculate texture gradient for entire image
106 | 
107 |         The original SelectiveSearch algorithm proposed Gaussian derivative
108 |         for 8 orientations, but we use LBP instead.
109 | 
110 |         output will be [height(*)][width(*)]
111 |     """
112 |     ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2]))
113 | 
114 |     for colour_channel in (0, 1, 2):
115 |         ret[:, :, colour_channel] = skimage.feature.local_binary_pattern(
116 |             img[:, :, colour_channel], 8, 1.0)
117 | 
118 |     return ret
119 | 
120 | 
121 | def _calc_texture_hist(img):
122 |     """
123 |         calculate texture histogram for each region
124 | 
125 |         calculate the histogram of gradient for each colours
126 |         the size of output histogram will be
127 |             BINS * ORIENTATIONS * COLOUR_CHANNELS(3)
128 |     """
129 |     BINS = 10
130 | 
131 |     hist = numpy.array([])
132 | 
133 |     for colour_channel in (0, 1, 2):
134 | 
135 |         # mask by the colour channel
136 |         fd = img[:, colour_channel]
137 | 
138 |         # calculate histogram for each orientation and concatenate them all
139 |         # and join to the result
140 |         hist = numpy.concatenate(
141 |             [hist] + [numpy.histogram(fd, BINS, (0.0, 1.0))[0]])
142 | 
143 |     # L1 Normalize
144 |     hist = hist / len(img)
145 | 
146 |     return hist
147 | 
148 | 
149 | def _extract_regions(img):
150 | 
151 |     R = {}
152 | 
153 |     # get hsv image
154 |     hsv = skimage.color.rgb2hsv(img[:, :, :3])
155 | 
156 |     # pass 1: count pixel positions
157 |     for y, i in enumerate(img):
158 | 
159 |         for x, (r, g, b, l) in enumerate(i):
160 | 
161 |             # initialize a new region
162 |             if l not in R:
163 |                 R[l] = {
164 |                     "min_x": 0xffff, "min_y": 0xffff,
165 |                     "max_x": 0, "max_y": 0, "labels": [l]}
166 | 
167 |             # bounding box
168 |             if R[l]["min_x"] > x:
169 |                 R[l]["min_x"] = x
170 |             if R[l]["min_y"] > y:
171 |                 R[l]["min_y"] = y
172 |             if R[l]["max_x"] < x:
173 |                 R[l]["max_x"] = x
174 |             if R[l]["max_y"] < y:
175 |                 R[l]["max_y"] = y
176 | 
177 |     # pass 2: calculate texture gradient
178 |     tex_grad = _calc_texture_gradient(img)
179 | 
180 |     # pass 3: calculate colour histogram of each region
181 |     for k, v in R.items():
182 | 
183 |         # colour histogram
184 |         masked_pixels = hsv[:, :, :][img[:, :, 3] == k]
185 |         R[k]["size"] = len(masked_pixels / 4)
186 |         R[k]["hist_c"] = _calc_colour_hist(masked_pixels)
187 | 
188 |         # texture histogram
189 |         R[k]["hist_t"] = _calc_texture_hist(tex_grad[:, :][img[:, :, 3] == k])
190 | 
191 |     return R
192 | 
193 | 
194 | def _extract_neighbours(regions):
195 | 
196 |     def intersect(a, b):
197 |         if (a["min_x"] < b["min_x"] < a["max_x"]
198 |                 and a["min_y"] < b["min_y"] < a["max_y"]) or (
199 |             a["min_x"] < b["max_x"] < a["max_x"]
200 |                 and a["min_y"] < b["max_y"] < a["max_y"]) or (
201 |             a["min_x"] < b["min_x"] < a["max_x"]
202 |                 and a["min_y"] < b["max_y"] < a["max_y"]) or (
203 |             a["min_x"] < b["max_x"] < a["max_x"]
204 |                 and a["min_y"] < b["min_y"] < a["max_y"]):
205 |             return True
206 |         return False
207 | 
208 |     R = list(regions.items())
209 |     neighbours = []
210 |     for cur, a in enumerate(R[:-1]):
211 |         for b in R[cur + 1:]:
212 |             if intersect(a[1], b[1]):
213 |                 neighbours.append((a, b))
214 | 
215 |     return neighbours
216 | 
217 | 
218 | def _merge_regions(r1, r2):
219 |     new_size = r1["size"] + r2["size"]
220 |     rt = {
221 |         "min_x": min(r1["min_x"], r2["min_x"]),
222 |         "min_y": min(r1["min_y"], r2["min_y"]),
223 |         "max_x": max(r1["max_x"], r2["max_x"]),
224 |         "max_y": max(r1["max_y"], r2["max_y"]),
225 |         "size": new_size,
226 |         "hist_c": (
227 |             r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size,
228 |         "hist_t": (
229 |             r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size,
230 |         "labels": r1["labels"] + r2["labels"]
231 |     }
232 |     return rt
233 | 
234 | def mycmp(x, y): 
235 |     return cmp(x[1],y[1])
236 |     
237 | def cmp_to_key(mycmp):
238 |     'Convert a cmp= function into a key= function'
239 |     class K(object):
240 |         def __init__(self, obj, *args):
241 |             self.obj = obj
242 |         def __lt__(self, other):
243 |             return mycmp(self.obj, other.obj) < 0
244 |         def __gt__(self, other):
245 |             return mycmp(self.obj, other.obj) > 0
246 |         def __eq__(self, other):
247 |             return mycmp(self.obj, other.obj) == 0
248 |         def __le__(self, other):
249 |             return mycmp(self.obj, other.obj) <= 0
250 |         def __ge__(self, other):
251 |             return mycmp(self.obj, other.obj) >= 0
252 |         def __ne__(self, other):
253 |             return mycmp(self.obj, other.obj) != 0
254 |     return K
255 |     
256 | def selective_search(
257 |         im_orig, scale=1.0, sigma=0.8, min_size=50):
258 |     '''Selective Search
259 | 
260 |     Parameters
261 |     ----------
262 |         im_orig : ndarray
263 |             Input image
264 |         scale : int
265 |             Free parameter. Higher means larger clusters in felzenszwalb segmentation.
266 |         sigma : float
267 |             Width of Gaussian kernel for felzenszwalb segmentation.
268 |         min_size : int
269 |             Minimum component size for felzenszwalb segmentation.
270 |     Returns
271 |     -------
272 |         img : ndarray
273 |             image with region label
274 |             region label is stored in the 4th value of each pixel [r,g,b,(region)]
275 |         regions : array of dict
276 |             [
277 |                 {
278 |                     'rect': (left, top, right, bottom),
279 |                     'labels': [...]
280 |                 },
281 |                 ...
282 |             ]
283 |     '''
284 |     assert im_orig.shape[2] == 3, "3ch image is expected"
285 | 
286 |     # load image and get smallest regions
287 |     # region label is stored in the 4th value of each pixel [r,g,b,(region)]
288 |     img = _generate_segments(im_orig, scale, sigma, min_size)
289 | 
290 |     if img is None:
291 |         return None, {}
292 | 
293 |     imsize = img.shape[0] * img.shape[1]
294 |     R = _extract_regions(img)
295 | 
296 |     # extract neighbouring information
297 |     neighbours = list(_extract_neighbours(R))
298 | 
299 |     # calculate initial similarities
300 |     S = {}
301 |     for (ai, ar), (bi, br) in neighbours:
302 |         S[(ai, bi)] = _calc_sim(ar, br, imsize)
303 | 
304 |     # hierarchal search
305 |     while S != {}:
306 | 
307 |         # get highest similarity
308 |         if sys.version_info[0] < 3: 
309 |             i, j = sorted(S.items(), cmp=mycmp)[-1][0]
310 |         else: 
311 |             i, j = sorted(S.items(), key=cmp_to_key(mycmp))[-1][0]
312 |             
313 |         # merge corresponding regions
314 |         t = max(R.keys()) + 1.0
315 |         R[t] = _merge_regions(R[i], R[j])
316 | 
317 |         # mark similarities for regions to be removed
318 |         key_to_delete = []
319 |         for k, v in S.items():
320 |             if (i in k) or (j in k):
321 |                 key_to_delete.append(k)
322 | 
323 |         # remove old similarities of related regions
324 |         for k in key_to_delete:
325 |             del S[k]
326 | 
327 |         # calculate similarity set with the new region
328 |         for k in filter(lambda a: a != (i, j), key_to_delete):
329 |             n = k[1] if k[0] in (i, j) else k[0]
330 |             S[(t, n)] = _calc_sim(R[t], R[n], imsize)
331 | 
332 |     regions = []
333 |     for k, r in R.items():
334 |         regions.append({
335 |             'rect': (
336 |                 r['min_x'], r['min_y'],
337 |                 r['max_x'] - r['min_x'], r['max_y'] - r['min_y']),
338 |             'size': r['size'],
339 |             'labels': r['labels']
340 |         })
341 | 
342 |     return img, regions
343 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/JupyterNotebooks/SmartStoplist.txt:
--------------------------------------------------------------------------------
  1 | #stop word list from SMART (Salton,1971).  Available at ftp://ftp.cs.cornell.edu/pub/smart/english.stop
  2 | a
  3 | a's
  4 | able
  5 | about
  6 | above
  7 | according
  8 | accordingly
  9 | across
 10 | actually
 11 | after
 12 | afterwards
 13 | again
 14 | against
 15 | ain't
 16 | all
 17 | allow
 18 | allows
 19 | almost
 20 | alone
 21 | along
 22 | already
 23 | also
 24 | although
 25 | always
 26 | am
 27 | among
 28 | amongst
 29 | an
 30 | and
 31 | another
 32 | any
 33 | anybody
 34 | anyhow
 35 | anyone
 36 | anything
 37 | anyway
 38 | anyways
 39 | anywhere
 40 | apart
 41 | appear
 42 | appreciate
 43 | appropriate
 44 | are
 45 | aren't
 46 | around
 47 | as
 48 | aside
 49 | ask
 50 | asking
 51 | associated
 52 | at
 53 | available
 54 | away
 55 | awfully
 56 | b
 57 | be
 58 | became
 59 | because
 60 | become
 61 | becomes
 62 | becoming
 63 | been
 64 | before
 65 | beforehand
 66 | behind
 67 | being
 68 | believe
 69 | below
 70 | beside
 71 | besides
 72 | best
 73 | better
 74 | between
 75 | beyond
 76 | both
 77 | brief
 78 | but
 79 | by
 80 | c
 81 | c'mon
 82 | c's
 83 | came
 84 | can
 85 | can't
 86 | cannot
 87 | cant
 88 | cause
 89 | causes
 90 | certain
 91 | certainly
 92 | changes
 93 | clearly
 94 | co
 95 | com
 96 | come
 97 | comes
 98 | concerning
 99 | consequently
100 | consider
101 | considering
102 | contain
103 | containing
104 | contains
105 | corresponding
106 | could
107 | couldn't
108 | course
109 | currently
110 | d
111 | definitely
112 | described
113 | despite
114 | did
115 | didn't
116 | different
117 | do
118 | does
119 | doesn't
120 | doing
121 | don't
122 | done
123 | down
124 | downwards
125 | during
126 | e
127 | each
128 | edu
129 | eg
130 | eight
131 | either
132 | else
133 | elsewhere
134 | enough
135 | entirely
136 | especially
137 | et
138 | etc
139 | even
140 | ever
141 | every
142 | everybody
143 | everyone
144 | everything
145 | everywhere
146 | ex
147 | exactly
148 | example
149 | except
150 | f
151 | far
152 | few
153 | fifth
154 | first
155 | five
156 | followed
157 | following
158 | follows
159 | for
160 | former
161 | formerly
162 | forth
163 | four
164 | from
165 | further
166 | furthermore
167 | g
168 | get
169 | gets
170 | getting
171 | given
172 | gives
173 | go
174 | goes
175 | going
176 | gone
177 | got
178 | gotten
179 | greetings
180 | h
181 | had
182 | hadn't
183 | happens
184 | hardly
185 | has
186 | hasn't
187 | have
188 | haven't
189 | having
190 | he
191 | he's
192 | hello
193 | help
194 | hence
195 | her
196 | here
197 | here's
198 | hereafter
199 | hereby
200 | herein
201 | hereupon
202 | hers
203 | herself
204 | hi
205 | him
206 | himself
207 | his
208 | hither
209 | hopefully
210 | how
211 | howbeit
212 | however
213 | i
214 | i'd
215 | i'll
216 | i'm
217 | i've
218 | ie
219 | if
220 | ignored
221 | immediate
222 | in
223 | inasmuch
224 | inc
225 | indeed
226 | indicate
227 | indicated
228 | indicates
229 | inner
230 | insofar
231 | instead
232 | into
233 | inward
234 | is
235 | isn't
236 | it
237 | it'd
238 | it'll
239 | it's
240 | its
241 | itself
242 | j
243 | just
244 | k
245 | keep
246 | keeps
247 | kept
248 | know
249 | knows
250 | known
251 | l
252 | last
253 | lately
254 | later
255 | latter
256 | latterly
257 | least
258 | less
259 | lest
260 | let
261 | let's
262 | like
263 | liked
264 | likely
265 | little
266 | look
267 | looking
268 | looks
269 | ltd
270 | m
271 | mainly
272 | many
273 | may
274 | maybe
275 | me
276 | mean
277 | meanwhile
278 | merely
279 | might
280 | more
281 | moreover
282 | most
283 | mostly
284 | much
285 | must
286 | my
287 | myself
288 | n
289 | name
290 | namely
291 | nd
292 | near
293 | nearly
294 | necessary
295 | need
296 | needs
297 | neither
298 | never
299 | nevertheless
300 | new
301 | next
302 | nine
303 | no
304 | nobody
305 | non
306 | none
307 | noone
308 | nor
309 | normally
310 | not
311 | nothing
312 | novel
313 | now
314 | nowhere
315 | o
316 | obviously
317 | of
318 | off
319 | often
320 | oh
321 | ok
322 | okay
323 | old
324 | on
325 | once
326 | one
327 | ones
328 | only
329 | onto
330 | or
331 | other
332 | others
333 | otherwise
334 | ought
335 | our
336 | ours
337 | ourselves
338 | out
339 | outside
340 | over
341 | overall
342 | own
343 | p
344 | particular
345 | particularly
346 | per
347 | perhaps
348 | placed
349 | please
350 | plus
351 | possible
352 | presumably
353 | probably
354 | provides
355 | q
356 | que
357 | quite
358 | qv
359 | r
360 | rather
361 | rd
362 | re
363 | really
364 | reasonably
365 | regarding
366 | regardless
367 | regards
368 | relatively
369 | respectively
370 | right
371 | s
372 | said
373 | same
374 | saw
375 | say
376 | saying
377 | says
378 | second
379 | secondly
380 | see
381 | seeing
382 | seem
383 | seemed
384 | seeming
385 | seems
386 | seen
387 | self
388 | selves
389 | sensible
390 | sent
391 | serious
392 | seriously
393 | seven
394 | several
395 | shall
396 | she
397 | should
398 | shouldn't
399 | since
400 | six
401 | so
402 | some
403 | somebody
404 | somehow
405 | someone
406 | something
407 | sometime
408 | sometimes
409 | somewhat
410 | somewhere
411 | soon
412 | sorry
413 | specified
414 | specify
415 | specifying
416 | still
417 | sub
418 | such
419 | sup
420 | sure
421 | t
422 | t's
423 | take
424 | taken
425 | tell
426 | tends
427 | th
428 | than
429 | thank
430 | thanks
431 | thanx
432 | that
433 | that's
434 | thats
435 | the
436 | their
437 | theirs
438 | them
439 | themselves
440 | then
441 | thence
442 | there
443 | there's
444 | thereafter
445 | thereby
446 | therefore
447 | therein
448 | theres
449 | thereupon
450 | these
451 | they
452 | they'd
453 | they'll
454 | they're
455 | they've
456 | think
457 | third
458 | this
459 | thorough
460 | thoroughly
461 | those
462 | though
463 | three
464 | through
465 | throughout
466 | thru
467 | thus
468 | to
469 | together
470 | too
471 | took
472 | toward
473 | towards
474 | tried
475 | tries
476 | truly
477 | try
478 | trying
479 | twice
480 | two
481 | u
482 | un
483 | under
484 | unfortunately
485 | unless
486 | unlikely
487 | until
488 | unto
489 | up
490 | upon
491 | us
492 | use
493 | used
494 | useful
495 | uses
496 | using
497 | usually
498 | uucp
499 | v
500 | value
501 | various
502 | very
503 | via
504 | viz
505 | vs
506 | w
507 | want
508 | wants
509 | was
510 | wasn't
511 | way
512 | we
513 | we'd
514 | we'll
515 | we're
516 | we've
517 | welcome
518 | well
519 | went
520 | were
521 | weren't
522 | what
523 | what's
524 | whatever
525 | when
526 | whence
527 | whenever
528 | where
529 | where's
530 | whereafter
531 | whereas
532 | whereby
533 | wherein
534 | whereupon
535 | wherever
536 | whether
537 | which
538 | while
539 | whither
540 | who
541 | who's
542 | whoever
543 | whole
544 | whom
545 | whose
546 | why
547 | will
548 | willing
549 | wish
550 | with
551 | within
552 | without
553 | won't
554 | wonder
555 | would
556 | would
557 | wouldn't
558 | x
559 | y
560 | yes
561 | yet
562 | you
563 | you'd
564 | you'll
565 | you're
566 | you've
567 | your
568 | yours
569 | yourself
570 | yourselves
571 | z
572 | zero
573 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/JupyterNotebooks/SmartStoplist_extended.txt:
--------------------------------------------------------------------------------
  1 | #stop word list from SMART (Salton,1971).  Available at ftp://ftp.cs.cornell.edu/pub/smart/english.stop
  2 | a
  3 | a's
  4 | able
  5 | about
  6 | above
  7 | according
  8 | accordingly
  9 | across
 10 | actually
 11 | after
 12 | afterwards
 13 | again
 14 | against
 15 | ain't
 16 | all
 17 | allow
 18 | allows
 19 | almost
 20 | alone
 21 | along
 22 | already
 23 | also
 24 | although
 25 | always
 26 | am
 27 | among
 28 | amongst
 29 | an
 30 | and
 31 | another
 32 | any
 33 | anybody
 34 | anyhow
 35 | anyone
 36 | anything
 37 | anyway
 38 | anyways
 39 | anywhere
 40 | apart
 41 | appear
 42 | appreciate
 43 | appropriate
 44 | are
 45 | aren't
 46 | around
 47 | as
 48 | aside
 49 | ask
 50 | asking
 51 | associated
 52 | at
 53 | available
 54 | away
 55 | awfully
 56 | b
 57 | be
 58 | became
 59 | because
 60 | become
 61 | becomes
 62 | becoming
 63 | been
 64 | before
 65 | beforehand
 66 | behind
 67 | being
 68 | believe
 69 | below
 70 | beside
 71 | besides
 72 | best
 73 | better
 74 | between
 75 | beyond
 76 | both
 77 | brief
 78 | but
 79 | by
 80 | c
 81 | c'mon
 82 | c's
 83 | came
 84 | can
 85 | can't
 86 | cannot
 87 | cant
 88 | cause
 89 | causes
 90 | certain
 91 | certainly
 92 | changes
 93 | clearly
 94 | co
 95 | com
 96 | come
 97 | comes
 98 | concerning
 99 | consequently
100 | consider
101 | considering
102 | contain
103 | containing
104 | contains
105 | corresponding
106 | could
107 | couldn't
108 | course
109 | currently
110 | d
111 | definitely
112 | described
113 | despite
114 | did
115 | didn't
116 | different
117 | do
118 | does
119 | doesn't
120 | doing
121 | don't
122 | done
123 | down
124 | downwards
125 | during
126 | e
127 | each
128 | edu
129 | eg
130 | eight
131 | either
132 | else
133 | elsewhere
134 | enough
135 | entirely
136 | especially
137 | et
138 | etc
139 | even
140 | ever
141 | every
142 | everybody
143 | everyone
144 | everything
145 | everywhere
146 | ex
147 | exactly
148 | example
149 | except
150 | f
151 | far
152 | few
153 | fifth
154 | first
155 | five
156 | followed
157 | following
158 | follows
159 | for
160 | former
161 | formerly
162 | forth
163 | four
164 | from
165 | further
166 | furthermore
167 | g
168 | get
169 | gets
170 | getting
171 | given
172 | gives
173 | go
174 | goes
175 | going
176 | gone
177 | got
178 | gotten
179 | greetings
180 | h
181 | had
182 | hadn't
183 | happens
184 | hardly
185 | has
186 | hasn't
187 | have
188 | haven't
189 | having
190 | he
191 | he's
192 | hello
193 | help
194 | hence
195 | her
196 | here
197 | here's
198 | hereafter
199 | hereby
200 | herein
201 | hereupon
202 | hers
203 | herself
204 | hi
205 | him
206 | himself
207 | his
208 | hither
209 | hopefully
210 | how
211 | howbeit
212 | however
213 | i
214 | i'd
215 | i'll
216 | i'm
217 | i've
218 | ie
219 | if
220 | ignored
221 | immediate
222 | in
223 | inasmuch
224 | inc
225 | indeed
226 | indicate
227 | indicated
228 | indicates
229 | inner
230 | insofar
231 | instead
232 | into
233 | inward
234 | is
235 | isn't
236 | it
237 | it'd
238 | it'll
239 | it's
240 | its
241 | itself
242 | j
243 | just
244 | k
245 | keep
246 | keeps
247 | kept
248 | know
249 | knows
250 | known
251 | l
252 | last
253 | lately
254 | later
255 | latter
256 | latterly
257 | least
258 | less
259 | lest
260 | let
261 | let's
262 | like
263 | liked
264 | likely
265 | little
266 | look
267 | looking
268 | looks
269 | ltd
270 | m
271 | mainly
272 | many
273 | may
274 | maybe
275 | me
276 | mean
277 | meanwhile
278 | merely
279 | might
280 | more
281 | moreover
282 | most
283 | mostly
284 | much
285 | must
286 | my
287 | myself
288 | n
289 | name
290 | namely
291 | nd
292 | near
293 | nearly
294 | necessary
295 | need
296 | needs
297 | neither
298 | never
299 | nevertheless
300 | new
301 | next
302 | nine
303 | no
304 | nobody
305 | non
306 | none
307 | noone
308 | nor
309 | normally
310 | not
311 | nothing
312 | novel
313 | now
314 | nowhere
315 | o
316 | obviously
317 | of
318 | off
319 | often
320 | oh
321 | ok
322 | okay
323 | old
324 | on
325 | once
326 | one
327 | ones
328 | only
329 | onto
330 | or
331 | other
332 | others
333 | otherwise
334 | ought
335 | our
336 | ours
337 | ourselves
338 | out
339 | outside
340 | over
341 | overall
342 | own
343 | p
344 | particular
345 | particularly
346 | per
347 | perhaps
348 | placed
349 | please
350 | plus
351 | possible
352 | presumably
353 | probably
354 | provides
355 | q
356 | que
357 | quite
358 | qv
359 | r
360 | rather
361 | rd
362 | re
363 | really
364 | reasonably
365 | regarding
366 | regardless
367 | regards
368 | relatively
369 | respectively
370 | right
371 | s
372 | said
373 | same
374 | saw
375 | say
376 | saying
377 | says
378 | second
379 | secondly
380 | see
381 | seeing
382 | seem
383 | seemed
384 | seeming
385 | seems
386 | seen
387 | self
388 | selves
389 | sensible
390 | sent
391 | serious
392 | seriously
393 | seven
394 | several
395 | shall
396 | she
397 | should
398 | shouldn't
399 | since
400 | six
401 | so
402 | some
403 | somebody
404 | somehow
405 | someone
406 | something
407 | sometime
408 | sometimes
409 | somewhat
410 | somewhere
411 | soon
412 | sorry
413 | specified
414 | specify
415 | specifying
416 | still
417 | sub
418 | such
419 | sup
420 | sure
421 | t
422 | t's
423 | take
424 | taken
425 | tell
426 | tends
427 | th
428 | than
429 | thank
430 | thanks
431 | thanx
432 | that
433 | that's
434 | thats
435 | the
436 | their
437 | theirs
438 | them
439 | themselves
440 | then
441 | thence
442 | there
443 | there's
444 | thereafter
445 | thereby
446 | therefore
447 | therein
448 | theres
449 | thereupon
450 | these
451 | they
452 | they'd
453 | they'll
454 | they're
455 | they've
456 | think
457 | third
458 | this
459 | thorough
460 | thoroughly
461 | those
462 | though
463 | three
464 | through
465 | throughout
466 | thru
467 | thus
468 | to
469 | together
470 | too
471 | took
472 | toward
473 | towards
474 | tried
475 | tries
476 | truly
477 | try
478 | trying
479 | twice
480 | two
481 | u
482 | un
483 | under
484 | unfortunately
485 | unless
486 | unlikely
487 | until
488 | unto
489 | up
490 | upon
491 | us
492 | use
493 | used
494 | useful
495 | uses
496 | using
497 | usually
498 | uucp
499 | v
500 | value
501 | various
502 | very
503 | via
504 | viz
505 | vs
506 | w
507 | want
508 | wants
509 | was
510 | wasn't
511 | way
512 | we
513 | we'd
514 | we'll
515 | we're
516 | we've
517 | welcome
518 | well
519 | went
520 | were
521 | weren't
522 | what
523 | what's
524 | whatever
525 | when
526 | whence
527 | whenever
528 | where
529 | where's
530 | whereafter
531 | whereas
532 | whereby
533 | wherein
534 | whereupon
535 | wherever
536 | whether
537 | which
538 | while
539 | whither
540 | who
541 | who's
542 | whoever
543 | whole
544 | whom
545 | whose
546 | why
547 | will
548 | willing
549 | wish
550 | with
551 | within
552 | without
553 | won't
554 | wonder
555 | would
556 | would
557 | wouldn't
558 | x
559 | y
560 | yes
561 | yet
562 | you
563 | you'd
564 | you'll
565 | you're
566 | you've
567 | your
568 | yours
569 | yourself
570 | yourselves
571 | z
572 | zero
573 | ###################
574 | section
575 | subsection
576 | sections
577 | subsections
578 | chapter
579 | chapters
580 | example
581 | paragraph
582 | paragraphs
583 | regard
584 | clause
585 | subclause
586 | case
587 | subparagraph
588 | subparagraphs
589 | i
590 | ii
591 | iii
592 | iv
593 | v
594 | vi
595 | vii
596 | viii
597 | ix
598 | x
599 | 
600 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/JupyterNotebooks/rake.py:
--------------------------------------------------------------------------------
  1 | # Implementation of RAKE - Rapid Automtic Keyword Exraction algorithm
  2 | # as described in:
  3 | # Rose, S., D. Engel, N. Cramer, and W. Cowley (2010). 
  4 | # Automatic keyword extraction from indi-vidual documents. 
  5 | # In M. W. Berry and J. Kogan (Eds.), Text Mining: Applications and Theory.unknown: John Wiley and Sons, Ltd.
  6 | 
  7 | import re
  8 | import operator
  9 | 
 10 | debug = False
 11 | test  = False
 12 | 
 13 | 
 14 | def is_number(s):
 15 |     try:
 16 |         float(s) if '.' in s else int(s)
 17 |         return True
 18 |     except ValueError:
 19 |         return False
 20 | 
 21 | 
 22 | def load_stop_words(stop_word_file):
 23 |     """
 24 |     Utility function to load stop words from a file and return as a list of words
 25 |     @param stop_word_file Path and file name of a file containing stop words.
 26 |     @return list A list of stop words.
 27 |     """
 28 |     stop_words = []
 29 |     for line in open(stop_word_file):
 30 |         if line.strip()[0:1] != "#":
 31 |             for word in line.split():  # in case more than one per line
 32 |                 stop_words.append(word)
 33 |     return stop_words
 34 | 
 35 | 
 36 | def separate_words(text, min_word_return_size):
 37 |     """
 38 |     Utility function to return a list of all words that are have a length greater than a specified number of characters.
 39 |     @param text The text that must be split in to words.
 40 |     @param min_word_return_size The minimum no of characters a word must have to be included.
 41 |     """
 42 |     splitter = re.compile('[^a-zA-Z0-9_\\+\\-/]')
 43 |     words = []
 44 |     for single_word in splitter.split(text):
 45 |         current_word = single_word.strip().lower()
 46 |         #leave numbers in phrase, but don't count as words, since they tend to invalidate scores of their phrases
 47 |         if len(current_word) > min_word_return_size and current_word != '' and not is_number(current_word):
 48 |             words.append(current_word)
 49 |     return words
 50 | 
 51 | 
 52 | def split_sentences(text):
 53 |     """
 54 |     Utility function to return a list of sentences.
 55 |     @param text The text that must be split in to sentences.
 56 |     """
 57 |     sentence_delimiters = re.compile(u'[.!?,;:\t\\\\"\\(\\)\\\'\u2019\u2013]|\\s\\-\\s')
 58 |     sentences = sentence_delimiters.split(text)
 59 |     return sentences
 60 | 
 61 | 
 62 | def build_stop_word_regex(stop_word_file_path):
 63 |     stop_word_list = load_stop_words(stop_word_file_path)
 64 |     stop_word_regex_list = []
 65 |     for word in stop_word_list:
 66 |         word_regex = r'\b' + word + r'(?![\w-])'  # added look ahead for hyphen
 67 |         stop_word_regex_list.append(word_regex)
 68 |     stop_word_pattern = re.compile('|'.join(stop_word_regex_list), re.IGNORECASE)
 69 |     return stop_word_pattern
 70 | 
 71 | 
 72 | def generate_candidate_keywords(sentence_list, stopword_pattern):
 73 |     phrase_list = []
 74 |     for s in sentence_list:
 75 |         tmp = re.sub(stopword_pattern, '|', s.strip())
 76 |         phrases = tmp.split("|")
 77 |         for phrase in phrases:
 78 |             phrase = phrase.strip().lower()
 79 |             if phrase != "":
 80 |                 phrase_list.append(phrase)
 81 |     return phrase_list
 82 | 
 83 | 
 84 | def calculate_word_scores(phraseList):
 85 |     word_frequency = {}
 86 |     word_degree = {}
 87 |     for phrase in phraseList:
 88 |         word_list = separate_words(phrase, 0)
 89 |         word_list_length = len(word_list)
 90 |         word_list_degree = word_list_length - 1
 91 |         #if word_list_degree > 3: word_list_degree = 3 #exp.
 92 |         for word in word_list:
 93 |             word_frequency.setdefault(word, 0)
 94 |             word_frequency[word] += 1
 95 |             word_degree.setdefault(word, 0)
 96 |             word_degree[word] += word_list_degree  #orig.
 97 |             #word_degree[word] += 1/(word_list_length*1.0) #exp.
 98 |     for item in word_frequency:
 99 |         word_degree[item] = word_degree[item] + word_frequency[item]
100 | 
101 |     # Calculate Word scores = deg(w)/frew(w)
102 |     word_score = {}
103 |     for item in word_frequency:
104 |         word_score.setdefault(item, 0)
105 |         word_score[item] = word_degree[item] / (word_frequency[item] * 1.0)  #orig.
106 |     #word_score[item] = word_frequency[item]/(word_degree[item] * 1.0) #exp.
107 |     return word_score
108 | 
109 | 
110 | def generate_candidate_keyword_scores(phrase_list, word_score):
111 |     keyword_candidates = {}
112 |     for phrase in phrase_list:
113 |         keyword_candidates.setdefault(phrase, 0)
114 |         word_list = separate_words(phrase, 0)
115 |         candidate_score = 0
116 |         for word in word_list:
117 |             candidate_score += word_score[word]
118 |         keyword_candidates[phrase] = candidate_score
119 |     return keyword_candidates
120 | 
121 | 
122 | class Rake(object):
123 |     def __init__(self, stop_words_path):
124 |         self.stop_words_path = stop_words_path
125 |         self.__stop_words_pattern = build_stop_word_regex(stop_words_path)
126 | 
127 |     def run(self, text):
128 |         sentence_list = split_sentences(text)
129 | 
130 |         phrase_list = generate_candidate_keywords(sentence_list, self.__stop_words_pattern)
131 |         word_scores = calculate_word_scores(phrase_list)
132 | 
133 |         keyword_candidates = generate_candidate_keyword_scores(phrase_list, word_scores)
134 | 
135 |         sorted_keywords = sorted(keyword_candidates.items(), key=operator.itemgetter(1), reverse=True)
136 |         return sorted_keywords
137 | 
138 | 
139 | if test:
140 |     text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types."
141 | 
142 |     # Split text into sentences
143 |     sentenceList = split_sentences(text)
144 |     #stoppath = "FoxStoplist.txt" #Fox stoplist contains "numbers", so it will not find "natural numbers" like in Table 1.1
145 |     stoppath = "SmartStoplist.txt"  #SMART stoplist misses some of the lower-scoring keywords in Figure 1.5, which means that the top 1/3 cuts off one of the 4.0 score words in Table 1.1
146 |     stopwordpattern = build_stop_word_regex(stoppath)
147 | 
148 |     # generate candidate keywords
149 |     phraseList = generate_candidate_keywords(sentenceList, stopwordpattern)
150 | 
151 |     # calculate individual word scores
152 |     wordscores = calculate_word_scores(phraseList)
153 | 
154 |     # generate candidate keyword scores
155 |     keywordcandidates = generate_candidate_keyword_scores(phraseList, wordscores)
156 |     if debug: print(keywordcandidates)
157 | 
158 |     sortedKeywords = sorted(keywordcandidates.items(), key=operator.itemgetter(1), reverse=True)
159 |     if debug: print(sortedKeywords)
160 | 
161 |     totalKeywords = len(sortedKeywords)
162 |     if debug: print(totalKeywords)
163 |     print(sortedKeywords[0:(totalKeywords / 3)])
164 | 
165 |     rake = Rake("SmartStoplist.txt")
166 |     keywords = rake.run(text)
167 |     print(keywords)
168 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/JupyterNotebooks/sample_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Services_CustomSearchExpertSystems/JupyterNotebooks/sample_page.png


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/JupyterNotebooks/searchdesign.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Services_CustomSearchExpertSystems/JupyterNotebooks/searchdesign.png


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/JupyterNotebooks/toolsandprocess.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Services_CustomSearchExpertSystems/JupyterNotebooks/toolsandprocess.png


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/Python/azsearch_mgmt.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Python code to upload data to Azure Search for the Custom Search example.
  3 | 
  4 | This script will upload all of the session information where
  5 | each individual sesssion equates to a document in an index
  6 | in an Azure Search service.
  7 | 
  8 | Go to http://portal.azure.com and sign up for a search service.
  9 | Get the service name and service key and plug it in below.
 10 | This is NOT production level code. Please do not use it as such.
 11 | You might have to pip install the imported modules here.
 12 | 
 13 | Run this script in the 'code' directory:
 14 |     python azsearch_mgmt.py
 15 | 
 16 | See Azure Search REST API docs for more info:
 17 |     https://docs.microsoft.com/en-us/rest/api/searchservice/index
 18 | 
 19 | """
 20 | 
 21 | import requests
 22 | import json
 23 | import csv
 24 | import datetime
 25 | import pytz
 26 | import calendar
 27 | import os
 28 | import pyexcel as pe
 29 | 
 30 | # This is the service you've already created in Azure Portal
 31 | serviceName = 'your_azure_search_service_name'
 32 | 
 33 | # Index to be created
 34 | indexName = 'name_of_index_to_create'
 35 | 
 36 | # Set your service API key, either via an environment variable or enter it below
 37 | #apiKey = os.getenv('SEARCH_KEY_DEV', '')
 38 | apiKey = 'your_azure_search_service_api_key'
 39 | apiVersion = '2016-09-01'
 40 | 
 41 | # Input parsed content Excel file, e.g., output of step #1 in
 42 | # https://github.com/CatalystCode/CustomSearch/tree/master/JupyterNotebooks/1-content_extraction.ipynb
 43 | inputfile = os.path.join(os.getcwd(), '../sample/parsed_content.xlsx')
 44 | 
 45 | # Define fields mapping from Excel file column names to search index field names (except Index)
 46 | # Change this mapping to match your content fields and rename output fields as desired
 47 | # Search field names should match their definition in getIndexDefinition()
 48 | fields_map = [ ('File'            , 'File'),
 49 |                ('ChapterTitle'    , 'ChapterTitle'),
 50 |                ('SectionTitle'    , 'SectionTitle'),
 51 |                ('SubsectionTitle' , 'SubsectionTitle'),
 52 |                ('SubsectionText'  , 'SubsectionText'),
 53 |                ('Keywords'        , 'Keywords') ]
 54 | 
 55 | # Fields: Index	File	ChapterTitle	SectionTitle	SubsectionTitle		SubsectionText	Keywords
 56 | def getIndexDefinition():
 57 |     return {
 58 |         "name": indexName,  
 59 |         "fields": [
 60 |         {"name": "Index", "type": "Edm.String", "key": True, "retrievable": True, "searchable": False, "filterable": False, "sortable": True, "facetable": False},
 61 | 
 62 |         {"name": "File", "type": "Edm.String", "retrievable": True, "searchable": False, "filterable": True, "sortable": True, "facetable": False},
 63 | 
 64 |         {"name": "ChapterTitle", "type": "Edm.String", "retrievable": True, "searchable": True, "filterable": True, "sortable": True, "facetable": True},
 65 | 
 66 |         {"name": "SectionTitle", "type": "Edm.String", "retrievable": True, "searchable": True, "filterable": True, "sortable": False, "facetable": True},
 67 | 
 68 |         {"name": "SubsectionTitle", "type": "Edm.String", "retrievable": True, "searchable": True, "filterable": True, "sortable": True, "facetable": False},
 69 | 
 70 |         {"name": "SubsectionText", "type": "Edm.String", "retrievable": True, "searchable": True, "filterable": False, "sortable": False, "facetable": False, "analyzer": "en.microsoft"},
 71 | 
 72 |         {"name": "Keywords", "type": "Edm.String", "retrievable": True, "searchable": True, "filterable": False, "sortable": False, "facetable": False, "analyzer": "en.microsoft"}
 73 |         ]
 74 |     }
 75 | 
 76 | def getServiceUrl():
 77 |     return 'https://' + serviceName + '.search.windows.net'
 78 | 
 79 | def getMethod(servicePath):
 80 |     headers = {'Content-type': 'application/json', 'api-key': apiKey}
 81 |     r = requests.get(getServiceUrl() + servicePath, headers=headers)
 82 |     #print(r.text)
 83 |     return r
 84 | 
 85 | def postMethod(servicePath, body):
 86 |     headers = {'Content-type': 'application/json', 'api-key': apiKey}
 87 |     r = requests.post(getServiceUrl() + servicePath, headers=headers, data=body)
 88 |     #print(r, r.text)
 89 |     return r
 90 | 
 91 | def createIndex():
 92 |     indexDefinition = json.dumps(getIndexDefinition())  
 93 |     servicePath = '/indexes/?api-version=%s' % apiVersion
 94 |     r = postMethod(servicePath, indexDefinition)
 95 |     #print r.text
 96 |     if r.status_code == 201:
 97 |        print('Index %s created' % indexName)   
 98 |     else:
 99 |        print('Failed to create index %s' % indexName)
100 |        exit(1)
101 | 
102 | def deleteIndex():
103 |     servicePath = '/indexes/%s?api-version=%s&delete' % (indexName, apiVersion)
104 |     headers = {'Content-type': 'application/json', 'api-key': apiKey}
105 |     r = requests.delete(getServiceUrl() + servicePath, headers=headers)
106 |     #print(r.text)
107 | 
108 | def getIndex():
109 |     servicePath = '/indexes/%s?api-version=%s' % (indexName, apiVersion)
110 |     r = getMethod(servicePath)
111 |     if r.status_code == 200:  
112 |        return True
113 |     else:
114 |        return False
115 | 
116 | def getDocumentObject():   
117 |     valarry = []
118 |     cnt = 1
119 |     records = pe.iget_records(file_name=inputfile)
120 |     for row in records:
121 |         outdict = {}
122 |         outdict['@search.action'] = 'upload'
123 | 
124 |         if (row[fields_map[0][0]]):
125 |             outdict['Index'] = str(row['Index'])
126 |             for (in_fld, out_fld) in fields_map:
127 |                 outdict[out_fld]  = row[in_fld]
128 |         valarry.append(outdict)
129 |         cnt+=1
130 | 
131 |     return {'value' : valarry}
132 | 
133 | def getDocumentObjectByChunk(start, end):   
134 |     valarry = []
135 |     cnt = 1
136 |     records = pe.iget_records(file_name=inputfile)
137 |     for i, row in enumerate(records):
138 |         if start <= i < end:
139 |             outdict = {}
140 |             outdict['@search.action'] = 'upload'
141 | 
142 |             if (row[fields_map[0][0]]):
143 |                 outdict['Index'] = str(row['Index'])
144 |                 for (in_fld, out_fld) in fields_map:
145 |                     outdict[out_fld]  = row[in_fld]
146 |             valarry.append(outdict)
147 |             cnt+=1
148 | 
149 |     return {'value' : valarry}
150 | 
151 | # Upload content for indexing in one request if content is not too large
152 | def uploadDocuments():
153 |     documents = json.dumps(getDocumentObject())
154 |     servicePath = '/indexes/' + indexName + '/docs/index?api-version=' + apiVersion
155 |     r = postMethod(servicePath, documents)
156 |     if r.status_code == 200:
157 |         print('Success: %s' % r)   
158 |     else:
159 |         print('Failure: %s' % r.text)
160 |         exit(1)
161 | 
162 | # Upload content for indexing in chunks if content is too large for one request
163 | def uploadDocumentsInChunks(chunksize):
164 |     records = pe.iget_records(file_name=inputfile)
165 |     cnt  = 0
166 |     for row in records:
167 |         cnt += 1
168 | 
169 |     for chunk in range(cnt/chunksize + 1):
170 |         print('Processing chunk number %d ...' % chunk)
171 |         start = chunk * chunksize
172 |         end   = start + chunksize
173 |         documents = json.dumps(getDocumentObjectByChunk(start, end))
174 |         servicePath = '/indexes/' + indexName + '/docs/index?api-version=' + apiVersion
175 |         r = postMethod(servicePath, documents)
176 |         if r.status_code == 200:
177 |             print('Success: %s' % r)   
178 |         else:
179 |             print('Failure: %s' % r.text)
180 |     return
181 | 
182 | # Upload content for indexing one document at a time
183 | def uploadDocumentsOneByOne():
184 |     records = pe.iget_records(file_name=inputfile)
185 |     valarry = []
186 |     for i, row in enumerate(records):
187 |         outdict = {}
188 |         outdict['@search.action'] = 'upload'
189 | 
190 |         if (row[fields_map[0][0]]):
191 |             outdict['Index'] = str(row['Index'])
192 |             for (in_fld, out_fld) in fields_map:
193 |                 outdict[out_fld]  = row[in_fld]
194 |             valarry.append(outdict)
195 | 
196 |         documents = json.dumps({'value' : valarry})
197 |         servicePath = '/indexes/' + indexName + '/docs/index?api-version=' + apiVersion
198 |         r = postMethod(servicePath, documents)
199 |         if r.status_code == 200:
200 |             print('%d Success: %s' % (i,r))   
201 |         else:
202 |             print('%d Failure: %s' % (i, r.text))
203 |             exit(1)
204 | 
205 | def printDocumentCount():
206 |     servicePath = '/indexes/' + indexName + '/docs/$count?api-version=' + apiVersion   
207 |     getMethod(servicePath)
208 | 
209 | def sampleQuery(query, ntop=3):
210 |     servicePath = '/indexes/' + indexName + '/docs?api-version=%s&search=%s&$top=%d' % \
211 |         (apiVersion, query, ntop)
212 |     getMethod(servicePath)
213 | 
214 | if __name__ == '__main__':
215 |     # Create index if it does not exist
216 |     if not getIndex():
217 |         createIndex()    
218 |     else:
219 |         ans = raw_input('Index %s already exists ... Do you want to delete it? [Y/n]' % indexName)
220 |         if ans.lower() == 'y':
221 |             deleteIndex()
222 |             print('Re-creating index %s ...' % indexName)
223 |             createIndex()
224 |         else:
225 |             print('Index %s is not deleted ... New content will be added to existing index' % indexName)
226 | 
227 |     #getIndex()
228 |     #uploadDocuments()
229 |     uploadDocumentsInChunks(50)
230 |     #uploadDocumentsOneByOne()
231 |     printDocumentCount()
232 |     sampleQuery('child tax credit')


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/Python/azsearch_query.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Python code to query Azure Search interactively
 3 | 
 4 | Run this script in the 'code' directory:
 5 |     python azsearch_query.py
 6 | 
 7 | See Azure Search REST API docs for more info:
 8 |     https://docs.microsoft.com/en-us/rest/api/searchservice/index
 9 | 
10 | """
11 | 
12 | import requests
13 | import json
14 | import os
15 | 
16 | # This is the service you've already created in Azure Portal
17 | serviceName = 'your_azure_search_service_name'
18 | 
19 | # This is the index you've already created in Azure Portal or via the azsearch_mgmt.py script
20 | indexName = 'your_index_name_to_use'
21 | 
22 | # Set your service API key, either via an environment variable or enter it below
23 | #apiKey = os.getenv('SEARCH_KEY_DEV', '')
24 | apiKey = 'your_azure_search_service_api_key'
25 | apiVersion = '2016-09-01'
26 | 
27 | # Retrieval options to alter the query results
28 | SEARCHFIELDS = None                            # use all searchable fields for retrieval
29 | #SEARCHFIELDS = 'Keywords, SubsectionText'     # use selected fields only for retrieval
30 | FUZZY = False                                  # enable fuzzy search (check API for details)
31 | NTOP  = 5                                      # uumber of results to return
32 | 
33 | 
34 | def getServiceUrl():
35 |     return 'https://' + serviceName + '.search.windows.net'
36 | 
37 | def getMethod(servicePath):
38 |     headers = {'Content-type': 'application/json', 'api-key': apiKey}
39 |     r = requests.get(getServiceUrl() + servicePath, headers=headers)
40 |     #print(r, r.text)
41 |     return r
42 | 
43 | def postMethod(servicePath, body):
44 |     headers = {'Content-type': 'application/json', 'api-key': apiKey}
45 |     r = requests.post(getServiceUrl() + servicePath, headers=headers, data=body)
46 |     #print(r, r.text)
47 |     return r
48 | 
49 | def submitQuery(query, fields=None, ntop=10):
50 |     servicePath = '/indexes/' + indexName + '/docs?api-version=%s&search=%s&$top=%d' % \
51 |         (apiVersion, query, ntop)
52 |     if fields != None:
53 |         servicePath += '&searchFields=%s' % fields
54 |     if FUZZY:
55 |         servicePath += '&queryType=full'
56 |     r = getMethod(servicePath)
57 |     if r.status_code != 200:
58 |         print('Failed to retrieve search results')
59 |         print(r, r.text)
60 |         return
61 |     docs = json.loads(r.text)['value']
62 |     print('Number of search results = %d\n' % len(docs))
63 |     for i, doc in enumerate(docs):
64 |         print('Results# %d' % (i+1))
65 |         print('Chapter title   : %s' % doc['ChapterTitle'].encode('utf8'))
66 |         print('Section title   : %s' % doc['SectionTitle'].encode('utf8'))
67 |         print('Subsection title: %s' % doc['SubsectionTitle'].encode('utf8'))
68 |         print('%s\n' % doc['SubsectionText'].encode('utf8'))
69 | 
70 | 
71 | #####################################################################
72 | # Azure Search interactive query - command-line interface
73 | # Retrieve Azure Search documents via an interactive query
74 | # Fields: Index	File	ChapterTitle	SectionTitle	SubsectionTitle		SubsectionText	Keywords
75 | #####################################################################
76 | if __name__ == '__main__':
77 |     while True:
78 |         print
79 |         print "Hit enter with no input to quit."
80 |         query = raw_input("Query: ")
81 |         if query == '':
82 |             exit(0)
83 | 
84 |         # Submit query to Azure Search and retrieve results
85 |         #searchFields = None
86 |         searchFields = SEARCHFIELDS
87 |         submitQuery(query, fields=searchFields, ntop=NTOP)


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/Python/azsearch_queryall.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Python code for batch retrieval of Azure Search results for multiple queries in a file
  3 | 
  4 | Run this script in the 'code' directory:
  5 |     python azsearch_queryall.py
  6 | 
  7 | See Azure Search REST API docs for more info:
  8 |     https://docs.microsoft.com/en-us/rest/api/searchservice/index
  9 | 
 10 | """
 11 | 
 12 | import requests
 13 | import json
 14 | import csv
 15 | import os
 16 | import pyexcel as pe
 17 | import codecs
 18 | import pandas as pd
 19 | 
 20 | # This is the service you've already created in Azure Portal
 21 | serviceName = 'your_azure_search_service_name'
 22 | 
 23 | # This is the index you've already created in Azure Portal or via the azsearch_mgmt.py script
 24 | indexName = 'your_index_name_to_use'
 25 | 
 26 | # Set your service API key, either via an environment variable or enter it below
 27 | #apiKey = os.getenv('SEARCH_KEY_DEV', '')
 28 | apiKey = 'your_azure_search_service_api_key'
 29 | apiVersion = '2016-09-01'
 30 | 
 31 | # Input file coontaining the list of queries [tab-separated .txt or .tsv, Excel .xls or .xlsx]
 32 | infile  = os.path.join(os.getcwd(), '../sample/sample_queries.txt')
 33 | outfile = os.path.join(os.getcwd(), '../sample/sample_query_answers.xlsx')
 34 | 
 35 | # Retrieval options to alter the query results
 36 | SEARCHFIELDS = None                            # use all searchable fields for retrieval
 37 | #SEARCHFIELDS = 'Keywords, SubsectionText'     # use selected fields only for retrieval
 38 | FUZZY = False                                  # enable fuzzy search (check API for details)
 39 | NTOP  = 5                                      # uumber of results to return
 40 | 
 41 | 
 42 | def getServiceUrl():
 43 |     return 'https://' + serviceName + '.search.windows.net'
 44 | 
 45 | def getMethod(servicePath):
 46 |     headers = {'Content-type': 'application/json', 'api-key': apiKey}
 47 |     r = requests.get(getServiceUrl() + servicePath, headers=headers)
 48 |     #print(r, r.text)
 49 |     return r
 50 | 
 51 | def postMethod(servicePath, body):
 52 |     headers = {'Content-type': 'application/json', 'api-key': apiKey}
 53 |     r = requests.post(getServiceUrl() + servicePath, headers=headers, data=body)
 54 |     #print(r, r.text)
 55 |     return r
 56 | 
 57 | def submitQuery(query, fields=None, ntop=10, fuzzy=False):
 58 |     servicePath = '/indexes/' + indexName + '/docs?api-version=%s&search=%s&$top=%d' % \
 59 |         (apiVersion, query, ntop)
 60 |     if fields != None:
 61 |         servicePath += '&searchFields=%s' % fields
 62 |     if fuzzy:
 63 |         servicePath += '&queryType=full'
 64 | 
 65 |     r = getMethod(servicePath)
 66 |     if r.status_code != 200:
 67 |         print('Failed to retrieve search results')
 68 |         print(query, r, r.text)
 69 |         return {}
 70 |     docs = json.loads(r.text)['value']
 71 |     return docs
 72 | 
 73 | 
 74 | #############################################################################
 75 | # Retrieve Azure Search documents for all queries in batch
 76 | # Fields: Index	File	ChapterTitle	SectionTitle	SubsectionTitle	SubsectionText	Keywords
 77 | #############################################################################
 78 | if __name__ == '__main__':
 79 |     # Dataframe to keep index of crawled pages
 80 |     df = pd.DataFrame(columns = ['Qid', 'Query', 'Rank', 'SubsectionText', 'ChapterTitle', 'SectionTitle', 'SubsectionTitle', 'Keywords'], dtype=unicode)
 81 | 
 82 |     if infile.endswith('.tsv') or infile.endswith('.txt'):
 83 |         records = pd.read_csv(infile, sep='\t', header=0, encoding='utf-8')
 84 |         rows = records.iterrows()
 85 |     elif infile.endswith('.xls') or infile.endswith('.xlsx'):
 86 |         records = pe.iget_records(file_name=infile)
 87 |         rows = enumerate(records)
 88 |     else:
 89 |         print('Unsupported query file extension. Options: tsv, txt, xls, xlsx')
 90 |         exit(1)
 91 |         
 92 |     for i, row in rows:
 93 |         qid   = int(row['Qid'])
 94 |         query = row['Query']
 95 |         # Submit query to Azure Search and retrieve results
 96 |         searchFields = SEARCHFIELDS
 97 |         docs = submitQuery(query, fields=searchFields, ntop=NTOP, fuzzy=FUZZY)
 98 |         print('QID: %4d\tNumber of results: %d' % (qid, len(docs)))
 99 |         for id, doc in enumerate(docs):
100 |             chapter_title    = doc['ChapterTitle']
101 |             section_title    = doc['SectionTitle']
102 |             subsection_title = doc['SubsectionTitle']
103 |             subsection_text  = doc['SubsectionText']
104 |             keywords         = doc['Keywords']
105 | 
106 |             df = df.append({'Qid'             : qid, 
107 |                             'Query'           : query, 
108 |                             'Rank'            : (id + 1), 
109 |                             'SubsectionText'  : subsection_text,
110 |                             'ChapterTitle'    : chapter_title,
111 |                             'SectionTitle'    : section_title,
112 |                             'SubsectionTitle' : subsection_title,
113 |                             'Keywords'   : keywords},
114 |                             ignore_index=True)
115 | 
116 |     # Save all answers
117 |     df['Qid']  = df['Qid'].astype(int)
118 |     df['Rank'] = df['Rank'].astype(int)
119 | 
120 |     if outfile.endswith('.xls') or outfile.endswith('.xlsx'):
121 |         df.to_excel(outfile, index=False, encoding='utf-8')    
122 |     else:    # default tab-separated file
123 |         df.to_csv(outfile, sep='\t', index=False, encoding='utf-8')    
124 | 
125 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/Python/keyphrase_extract.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################################
  2 | # Keyphrase extractor example for experimentation
  3 | # Supports algoriths: RAKE, topic rank, single rank, TFIDF and KPMINER
  4 | # For more info about RAKE algorithm and implmentation, see https://github.com/aneesha/RAKE
  5 | # Note: A copy of rake.py and SmartStoplist.txt stopwords list is included with this script
  6 | # For more info about the PKE implementations, see https://github.com/boudinfl/pke
  7 | # Note: Install PKE from the GitHub repo https://github.com/boudinfl/pke
  8 | ###########################################################################################
  9 | 
 10 | # Import base packages
 11 | from bs4 import BeautifulSoup
 12 | import os, glob, sys, re
 13 | from rake import *
 14 | import pke
 15 | 
 16 | 
 17 | # Strip non-ascii characters that break the overlap check
 18 | def strip_non_ascii(s):
 19 |     s = (c for c in s if 0 < ord(c) < 255)
 20 |     s = ''.join(s)
 21 |     return s
 22 | 
 23 | # Clean text: remove newlines, compact spaces, strip non_ascii, etc.
 24 | def clean_text(text, lowercase=False, nopunct=False):
 25 |     # Convert to lowercase
 26 |     if lowercase:
 27 |         text = text.lower()
 28 | 
 29 |     # Remove punctuation
 30 |     if nopunct:
 31 |         puncts = string.punctuation
 32 |         for c in puncts:
 33 |             text = text.replace(c, ' ')
 34 | 
 35 |     # Strip non-ascii characters
 36 |     text = strip_non_ascii(text)
 37 |     
 38 |     # Remove newlines - Compact and strip whitespaces
 39 |     text = re.sub('[\r\n]+', ' ', text)
 40 |     text = re.sub('\s+', ' ', text)
 41 |     return text.strip()
 42 | 
 43 | # Extract keyphrases using RAKE algorithm. Limit results by minimum score.
 44 | def get_keyphrases_rake(infile, stoplist_path=None, min_score=0):
 45 |     if stoplist_path == None:
 46 |         stoplist_path = 'SmartStoplist.txt'
 47 | 
 48 |     rake = Rake(stoplist_path)
 49 |     text = open(infile, 'r').read()
 50 |     keywords = rake.run(text)
 51 |     phrases = []
 52 |     for keyword in keywords:
 53 |         score = keyword[1]
 54 |         if score >= min_score:
 55 |             phrases.append(keyword)
 56 | 
 57 |     return phrases
 58 | 
 59 | def get_keyphrases_pke(infile, mode='topic', stoplist_path=None, postags=None, ntop=100):
 60 |     if stoplist_path == None:
 61 |         stoplist_path = 'SmartStoplist.txt'
 62 |     stoplist = [open(stoplist_path, 'r').read()]
 63 | 
 64 |     if postags == None:
 65 |         postags = ['NN', 'NNS', 'NNP', 'NNPS', 'JJ', 'JJR', 'JJS', 'VBN', 'VBD']
 66 | 
 67 |     # Run keyphrase extractor - Topic_Rank unsupervised method
 68 |     if mode == 'topic':
 69 |         try:
 70 |             extractor = pke.TopicRank(input_file=infile, language='english')
 71 |             extractor.read_document(format='raw', stemmer=None)
 72 |             extractor.candidate_selection(stoplist=stoplist, pos=postags)
 73 |             extractor.candidate_weighting(threshold=0.25, method='average')
 74 |             phrases = extractor.get_n_best(300, redundancy_removal=True)
 75 |         except:
 76 |             phrases = []
 77 | 
 78 |     # Run keyphrase extractor - Single_Rank unsupervised method
 79 |     elif mode == 'single':
 80 |         try:
 81 |             extractor = pke.SingleRank(input_file=infile, language='english')
 82 |             extractor.read_document(format='raw', stemmer=None)
 83 |             extractor.candidate_selection(stoplist=stoplist)
 84 |             extractor.candidate_weighting(normalized=True)
 85 |         except:
 86 |             phrases = []
 87 | 
 88 |     # Run keyphrase extractor - TfIdf unsupervised method
 89 |     elif mode == 'tfidf':
 90 |         try:
 91 |             extractor= pke.TfIdf(input_file=infile, language='english')
 92 |             extractor.read_document(format='raw', stemmer=None)
 93 |             extractor.candidate_selection(stoplist=stoplist)
 94 |             extractor.candidate_weighting()
 95 |         except:
 96 |             phrases = []
 97 | 
 98 |     # Run keyphrase extractor - KP_Miner unsupervised method
 99 |     elif mode == 'kpminer':
100 |         try:
101 |             extractor = pke.KPMiner(input_file=infile, language='english')
102 |             extractor.read_document(format='raw', stemmer=None)
103 |             extractor.candidate_selection(stoplist=stoplist)
104 |             extractor.candidate_weighting()
105 |         except:
106 |             phrases = []
107 | 
108 |     else:   # invalid mode
109 |         print "Invalid keyphrase extraction algorithm: %s" % mode
110 |         print "Valid PKE algorithms: [topic, single, kpminer, tfidf]"
111 |         exit(1)
112 | 
113 |     phrases = extractor.get_n_best(ntop, redundancy_removal=True)
114 |     return phrases
115 | 
116 | def usage():
117 |     print('Usage %s filename [algo]' % os.path.basename(sys.argv[0]))
118 |     print('Algo options: rake, topic, single, tfidf, kpminer')
119 | 
120 | 
121 | ##############################
122 | # Main processing
123 | ##############################
124 | 
125 | if len(sys.argv) < 2:
126 |     print('Missing content file name')
127 |     usage()
128 |     exit(1)
129 | 
130 | infile = sys.argv[1]
131 | if len(sys.argv) >= 3:
132 |     algo = sys.argv[2]
133 |     if algo not in ['rake', 'topic', 'single', 'tfidf', 'kpminer']:
134 |         print "Invalid keyphrase extraction algorithm: %s" % algo
135 |         usage()
136 |         exit(1)
137 | else:
138 |     algo = 'rake'
139 | 
140 | # Read custom stopwords list from file - Applies to all algos
141 | # If no stopwords file is supplied, default uses SmartStoplist.txt
142 | stoplist_file = 'SmartStoplist_extended.txt'
143 | 
144 | # Select POS tags to use for PKE candidate selection, use default if None
145 | postags = ['NN', 'NNS', 'NNP', 'NNPS', 'JJ', 'JJR', 'JJS', 'VBN', 'VBD']
146 | 
147 | # Run keyphrase extraction
148 | if algo == 'rake':
149 |     min_score = 1
150 |     phrases = get_keyphrases_rake(infile, stoplist_path=stoplist_file, min_score=min_score)
151 | else:
152 |     ntop = 200
153 |     phrases = get_keyphrases_pke(infile, mode=algo, stoplist_path=stoplist_file, postags=postags, ntop=ntop)
154 | 
155 | # Report all keyphrases and their scores
156 | print 'Number of extracted keyphrases = %d' % len(phrases)
157 | for phrase in phrases:
158 |     print phrase
159 | 
160 | # Combined list of keyphrases (no scores)
161 | all_phrases = ', '.join(p[0] for p in phrases)
162 | print('\nKeyphrases list: %s' % all_phrases)
163 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Custom Search 
 3 | 
 4 | > Sample custom search project using Azure Search and the US Tax Code.
 5 | 
 6 | > Python scripts and Jupyter notebooks that allow you to quickly and iteratively customize, 
 7 | improve and measure your custom search experience.
 8 | 
 9 | ## Description
10 | Querying specific content areas quickly and easily is a common services sector need. Fast traversal of specialized publications, customer support knowledge bases or document repositories allows service companies to deliver their particular service efficiently and effectively. Simple FAQs don’t cover enough ground, and a string search isn’t effective or efficient for those not familiar with the domain or the document set. Instead, these companies can deliver a custom search experience that saves their clients time and provides them better service through a question and answer format.  In this project, we leveraged Azure Search and Cognitive Services and we share our custom code for iterative testing, measurement and indexer redeployment. In our solution, the customized search engine will form the foundation for delivering a question and answer experience in a specific domain area.
11 | 
12 | ## End-to-End Example Provided in Jupyter Notebooks
13 | * Collect, pre-process, and augment content with keyphrases
14 | * Create an Azure Search index
15 | * Query the index and retrieve results interactively and/or in batch
16 | 
17 | ## Getting Started
18 | 
19 | 1. Read the [Real Life Code Story](https://www.microsoft.com/reallifecode/), "[Developing a Custom Search Engine for an Expert Chat System.](https://www.microsoft.com/reallifecode/)"
20 | 2. Review the [Azure Search service features](https://azure.microsoft.com/en-us/services/search/).
21 | 3. Get a [free trial subscriptions to Azure Search.](https://azure.microsoft.com/en-us/free/)
22 | 4. Copy your Azure Search name and Key. 
23 | 5. Review the [sample](https://github.com/CatalystCode/CustomSearch/tree/master/sample)
24 |  search index input and enriched input in the sample folder to understand content.
25 | 6. Try the sample Jupyter notebooks for an overview of the end-2-end process for content extraction, augmentation with keyphrases, indexing and retrieval.
26 | 	* Step 1: Content and keyphrase extraction: [1-content_extraction.ipynb](https://github.com/CatalystCode/CustomSearch/blob/master/JupyterNotebooks/1-content_extraction.ipynb)
27 | 	* Step 2: Index creation: [2-content_indexing.ipynb](https://github.com/CatalystCode/CustomSearch/blob/master/JupyterNotebooks/2-content_indexing.ipynb)
28 | 	* Step 3: Interactive and batch search queries: [3-azure_search_query.ipynb](https://github.com/CatalystCode/CustomSearch/blob/master/JupyterNotebooks/3-azure_search_query.ipynb)
29 | 7. A command-line version of the scripts is available under the Python folder.
30 | 	* Run the [azsearch_mgmt.py script](https://github.com/CatalystCode/CustomSearch/blob/master/Python/azsearch_mgmt.py), using your Azure Search name, key and index name of your choice to create a search index.
31 | 	* Run the [azsearch_query.py script](https://github.com/CatalystCode/CustomSearch/blob/master/Python/azsearch_query.py) to interactively query your new search index and see results.
32 | 	* Run the [azsearch_queryall.py script](https://github.com/CatalystCode/CustomSearch/blob/master/Python/azsearch_queryall.py) to batch query your new search index and evaluate the results.
33 | 	* Run the [keyphrase_extract.py script](https://github.com/CatalystCode/CustomSearch/blob/master/Python/keyphrase_extract.py) to experiment with various keyphrase extraction algorithms to enrich the search index metadata.  Note this script is Python 2.7 only.
34 | 
35 |  
36 | 
37 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/html/1.1.1.1.1.3.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 | <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
  5 | <link type="text/css" rel="stylesheet" href="styles/css/css_kShW4RPmRstZ3SpIC-ZvVGNFVAi0WEMuCnI0ZkYIaFw.css" media="all" />
  6 | <link type="text/css" rel="stylesheet" href="styles/css_tuqeOBz1ozigHOvScJR2wasCmXBizZ9rfd58u6_20EE.css" media="all" />
  7 | <link type="text/css" rel="stylesheet" href="styles/css_dolo-SIAwemLdrlTs99Lrug9kFXMYlMG3OlznBv4Kho.css" media="all" />
  8 | <link type="text/css" rel="stylesheet" href="styles/css_XgGKW_fNRFCK5BruHWlbChY4U8WE0xT4CWGilKSjSXA.css" media="all" />
  9 | <link type="text/css" rel="stylesheet" href="styles/css_rJ3pqftttKVzxtjsOG18hAid4RqqjfFMw3d1C89lWd4.css" media="all" />
 10 | <link type="text/css" rel="stylesheet" href="styles/css_Q4z0-iME7xTpui0Tzf4MEFv02rRuJ1dHZbo9kP_JLBg.css" media="all" />
 11 | </head>
 12 | 
 13 | <title>26 U.S. Code § 3 - Tax tables for individuals | US Law | LII / Legal Information Institute</title>
 14 | <h1 class="title" id="page-title">26 U.S. Code § 3 - Tax tables for individuals</h1>
 15 | <ol class="breadcrumb" itemprop="breadcrumb"><li><a href="/uscode/text" title="United States Code">U.S. Code</a> › <a extid="usc_sup_01_26" href="/uscode/text/26" title="Title 26 - INTERNAL REVENUE CODE">Title 26</a> › <a extid="usc_sup_01_26_10_A" href="/uscode/text/26/subtitle-A" title="Subtitle A - Income Taxes
 16 | ">Subtitle A</a> › <a extid="usc_sup_01_26_10_A_20_1" href="/uscode/text/26/subtitle-A/chapter-1" title="Chapter 1 - NORMAL TAXES AND SURTAXES
 17 | ">Chapter 1</a> › <a extid="usc_sup_01_26_10_A_20_1_30_A" href="/uscode/text/26/subtitle-A/chapter-1/subchapter-A" title="Subchapter A - Determination of Tax Liability">Subchapter A</a> › <a extid="usc_sup_01_26_10_A_20_1_30_A_40_I" href="/uscode/text/26/subtitle-A/chapter-1/subchapter-A/part-I" title="Part I - TAX ON INDIVIDUALS">Part I</a> › § 3</li></ol>
 18 | <div class="section">
 19 |  <div class="num" value="3">
 20 |   § 3.
 21 |  </div>
 22 |  <div class="heading">
 23 |   Tax tables for individuals
 24 |  </div>
 25 |  <div class="subsection indent2 firstIndent-2">
 26 |   <a name="a">
 27 |   </a>
 28 |   <span class="num bold" value="a">
 29 |    (a)
 30 |   </span>
 31 |   <span class="heading bold">
 32 |    Imposition of tax table tax
 33 |   </span>
 34 |   <div class="paragraph indent3 firstIndent-2">
 35 |    <a name="a_1">
 36 |    </a>
 37 |    <span class="num bold" value="1">
 38 |     (1)
 39 |    </span>
 40 |    <span class="heading bold">
 41 |     In general
 42 |    </span>
 43 |    <span class="chapeau">
 44 |     In lieu of the tax imposed by section 1, there is hereby imposed for each taxable year on the taxable income of every individual—
 45 |    </span>
 46 |    <div class="subparagraph indent2">
 47 |     <a name="a_1_A">
 48 |     </a>
 49 |     <span class="num" value="A">
 50 |      (A)
 51 |     </span>
 52 |     <div class="content">
 53 |      who does not itemize his deductions for the taxable year, and
 54 |     </div>
 55 |    </div>
 56 |    <div class="subparagraph indent2">
 57 |     <a name="a_1_B">
 58 |     </a>
 59 |     <span class="num" value="B">
 60 |      (B)
 61 |     </span>
 62 |     <div class="content">
 63 |      whose taxable income for such taxable year does not exceed the ceiling amount,
 64 |     </div>
 65 |    </div>
 66 |    <div class="continuation indent1 firstIndent0">
 67 |     a tax determined under tables, applicable to such taxable year, which shall be prescribed by the Secretary and which shall be in such form as he determines appropriate. In the table so prescribed, the amounts of the tax shall be computed on the basis of the rates prescribed by section 1.
 68 |    </div>
 69 |   </div>
 70 |   <div class="paragraph indent3 firstIndent-2">
 71 |    <a name="a_2">
 72 |    </a>
 73 |    <span class="num bold" value="2">
 74 |     (2)
 75 |    </span>
 76 |    <span class="heading bold">
 77 |     Ceiling amount defined
 78 |    </span>
 79 |    <div class="content">
 80 |     <p>
 81 |      For purposes of paragraph (1), the term “ceiling amount” means, with respect to any taxpayer, the amount (not less than $20,000) determined by the Secretary for the tax rate category in which such taxpayer falls.
 82 |     </p>
 83 |    </div>
 84 |   </div>
 85 |   <div class="paragraph indent3 firstIndent-2">
 86 |    <a name="a_3">
 87 |    </a>
 88 |    <span class="num bold" value="3">
 89 |     (3)
 90 |    </span>
 91 |    <span class="heading bold">
 92 |     Authority to prescribe tables for taxpayers who itemize deductions
 93 |    </span>
 94 |    <div class="content">
 95 |     <p>
 96 |      The Secretary may provide that this section shall apply also for any taxable year to individuals who itemize their deductions. Any tables prescribed under the preceding sentence shall be on the basis of taxable income.
 97 |     </p>
 98 |    </div>
 99 |   </div>
100 |  </div>
101 |  <div class="subsection indent2 firstIndent-2">
102 |   <a name="b">
103 |   </a>
104 |   <span class="num bold" value="b">
105 |    (b)
106 |   </span>
107 |   <span class="heading bold">
108 |    Section inapplicable to certain individuals
109 |   </span>
110 |   <span class="chapeau">
111 |    This section shall not apply to—
112 |   </span>
113 |   <div class="paragraph indent1">
114 |    <a name="b_1">
115 |    </a>
116 |    <span class="num" value="1">
117 |     (1)
118 |    </span>
119 |    <div class="content">
120 |     an individual making a return under section 443(a)(1) for a period of less than 12 months on account of a change in annual accounting period, and
121 |    </div>
122 |   </div>
123 |   <div class="paragraph indent1">
124 |    <a name="b_2">
125 |    </a>
126 |    <span class="num" value="2">
127 |     (2)
128 |    </span>
129 |    <div class="content">
130 |     an estate or trust.
131 |    </div>
132 |   </div>
133 |  </div>
134 |  <div class="subsection indent2 firstIndent-2">
135 |   <a name="c">
136 |   </a>
137 |   <span class="num bold" value="c">
138 |    (c)
139 |   </span>
140 |   <span class="heading bold">
141 |    Tax treated as imposed by section 1
142 |   </span>
143 |   <div class="content">
144 |    <p>
145 |     For purposes of this title, the tax imposed by this section shall be treated as tax imposed by section 1.
146 |    </p>
147 |   </div>
148 |  </div>
149 |  <div class="subsection indent2 firstIndent-2">
150 |   <a name="d">
151 |   </a>
152 |   <span class="num bold" value="d">
153 |    (d)
154 |   </span>
155 |   <span class="heading bold">
156 |    Taxable income
157 |   </span>
158 |   <div class="content">
159 |    <p>
160 |     Whenever it is necessary to determine the taxable income of an individual to whom this section applies, the taxable income shall be determined under section 63.
161 |    </p>
162 |   </div>
163 |  </div>
164 |  <div class="subsection indent2 firstIndent-2">
165 |   <a name="e">
166 |   </a>
167 |   <span class="num bold" value="e">
168 |    (e)
169 |   </span>
170 |   <span class="heading bold">
171 |    Cross reference
172 |   </span>
173 |   <div class="content">
174 |    <p>
175 |     For computation of tax by Secretary, see section 6014.
176 |    </p>
177 |   </div>
178 |  </div>
179 |  <div class="sourceCredit">
180 |   (Aug. 16, 1954, ch. 736,
181 |   <a href="http://uscode.house.gov/statviewer.htm?volume=68A&amp;page=8" title="68A Stat. 8">
182 |    68A Stat. 8
183 |   </a>
184 |   ;
185 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 88–272, title III">
186 |    Pub. L. 88–272, title III
187 |   </a>
188 |   , § 301(a),
189 |   <span class="date" date="1964-02-26">
190 |    Feb. 26, 1964
191 |   </span>
192 |   ,
193 |   <a href="http://uscode.house.gov/statviewer.htm?volume=78&amp;page=129" title="78 Stat. 129">
194 |    78 Stat. 129
195 |   </a>
196 |   ;
197 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 91–172, title VIII">
198 |    Pub. L. 91–172, title VIII
199 |   </a>
200 |   , § 803(c),
201 |   <span class="date" date="1969-12-30">
202 |    Dec. 30, 1969
203 |   </span>
204 |   ,
205 |   <a href="http://uscode.house.gov/statviewer.htm?volume=83&amp;page=684" title="83 Stat. 684">
206 |    83 Stat. 684
207 |   </a>
208 |   ;
209 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d094:./list/bd/d094pl.lst:12(Public_Laws)" title="Pub. L. 94–12, title II">
210 |    Pub. L. 94–12, title II
211 |   </a>
212 |   , § 201(c),
213 |   <span class="date" date="1975-03-29">
214 |    Mar. 29, 1975
215 |   </span>
216 |   ,
217 |   <a href="http://uscode.house.gov/statviewer.htm?volume=89&amp;page=29" title="89 Stat. 29">
218 |    89 Stat. 29
219 |   </a>
220 |   ;
221 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d094:./list/bd/d094pl.lst:455(Public_Laws)" title="Pub. L. 94–455, title V">
222 |    Pub. L. 94–455, title V
223 |   </a>
224 |   , § 501(a),
225 |   <span class="date" date="1976-10-04">
226 |    Oct. 4, 1976
227 |   </span>
228 |   ,
229 |   <a href="http://uscode.house.gov/statviewer.htm?volume=90&amp;page=1558" title="90 Stat. 1558">
230 |    90 Stat. 1558
231 |   </a>
232 |   ;
233 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d095:./list/bd/d095pl.lst:30(Public_Laws)" title="Pub. L. 95–30, title I">
234 |    Pub. L. 95–30, title I
235 |   </a>
236 |   , § 101(b),
237 |   <span class="date" date="1977-05-23">
238 |    May 23, 1977
239 |   </span>
240 |   ,
241 |   <a href="http://uscode.house.gov/statviewer.htm?volume=91&amp;page=131" title="91 Stat. 131">
242 |    91 Stat. 131
243 |   </a>
244 |   ;
245 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d095:./list/bd/d095pl.lst:600(Public_Laws)" title="Pub. L. 95–600, title IV">
246 |    Pub. L. 95–600, title IV
247 |   </a>
248 |   , § 401(b)(1),
249 |   <span class="date" date="1978-11-06">
250 |    Nov. 6, 1978
251 |   </span>
252 |   ,
253 |   <a href="http://uscode.house.gov/statviewer.htm?volume=92&amp;page=2867" title="92 Stat. 2867">
254 |    92 Stat. 2867
255 |   </a>
256 |   ;
257 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d095:./list/bd/d095pl.lst:600(Public_Laws)" title="Pub. L. 95–600, title II">
258 |    Pub. L. 95–600, title II
259 |   </a>
260 |   , § 202(g), as added
261 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d096:./list/bd/d096pl.lst:222(Public_Laws)" title="Pub. L. 96–222, title I">
262 |    Pub. L. 96–222, title I
263 |   </a>
264 |   , § 108(a)(1)(A),
265 |   <span class="date" date="1980-04-01">
266 |    Apr. 1, 1980
267 |   </span>
268 |   ,
269 |   <a href="http://uscode.house.gov/statviewer.htm?volume=94&amp;page=223" title="94 Stat. 223">
270 |    94 Stat. 223
271 |   </a>
272 |   ;
273 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d096:./list/bd/d096pl.lst:222(Public_Laws)" title="Pub. L. 96–222, title I">
274 |    Pub. L. 96–222, title I
275 |   </a>
276 |   , § 108(a)(1)(E),
277 |   <span class="date" date="1980-04-01">
278 |    Apr. 1, 1980
279 |   </span>
280 |   ,
281 |   <a href="http://uscode.house.gov/statviewer.htm?volume=94&amp;page=225" title="94 Stat. 225">
282 |    94 Stat. 225
283 |   </a>
284 |   ;
285 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d097:./list/bd/d097pl.lst:34(Public_Laws)" title="Pub. L. 97–34, title I">
286 |    Pub. L. 97–34, title I
287 |   </a>
288 |   , §§ 101(b)(2)(B), (C), (c)(2)(A), 121(c)(3),
289 |   <span class="date" date="1981-08-13">
290 |    Aug. 13, 1981
291 |   </span>
292 |   ,
293 |   <a href="http://uscode.house.gov/statviewer.htm?volume=95&amp;page=183" title="95 Stat. 183">
294 |    95 Stat. 183
295 |   </a>
296 |   , 197;
297 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d099:./list/bd/d099pl.lst:514(Public_Laws)" title="Pub. L. 99–514, title I">
298 |    Pub. L. 99–514, title I
299 |   </a>
300 |   , §§ 102(b), 141(b)(1),
301 |   <span class="date" date="1986-10-22">
302 |    Oct. 22, 1986
303 |   </span>
304 |   ,
305 |   <a href="http://uscode.house.gov/statviewer.htm?volume=100&amp;page=2102" title="100 Stat. 2102">
306 |    100 Stat. 2102
307 |   </a>
308 |   , 2117.)
309 |  </div>
310 | </div>
311 | 
312 | </html>
313 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/html/1.1.1.1.2.1.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 | <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
  5 | <link type="text/css" rel="stylesheet" href="styles/css/css_kShW4RPmRstZ3SpIC-ZvVGNFVAi0WEMuCnI0ZkYIaFw.css" media="all" />
  6 | <link type="text/css" rel="stylesheet" href="styles/css_tuqeOBz1ozigHOvScJR2wasCmXBizZ9rfd58u6_20EE.css" media="all" />
  7 | <link type="text/css" rel="stylesheet" href="styles/css_dolo-SIAwemLdrlTs99Lrug9kFXMYlMG3OlznBv4Kho.css" media="all" />
  8 | <link type="text/css" rel="stylesheet" href="styles/css_XgGKW_fNRFCK5BruHWlbChY4U8WE0xT4CWGilKSjSXA.css" media="all" />
  9 | <link type="text/css" rel="stylesheet" href="styles/css_rJ3pqftttKVzxtjsOG18hAid4RqqjfFMw3d1C89lWd4.css" media="all" />
 10 | <link type="text/css" rel="stylesheet" href="styles/css_Q4z0-iME7xTpui0Tzf4MEFv02rRuJ1dHZbo9kP_JLBg.css" media="all" />
 11 | </head>
 12 | 
 13 | <title>26 U.S. Code § 11 - Tax imposed | US Law | LII / Legal Information Institute</title>
 14 | <h1 class="title" id="page-title">26 U.S. Code § 11 - Tax imposed</h1>
 15 | <ol class="breadcrumb" itemprop="breadcrumb"><li><a href="/uscode/text" title="United States Code">U.S. Code</a> › <a extid="usc_sup_01_26" href="/uscode/text/26" title="Title 26 - INTERNAL REVENUE CODE">Title 26</a> › <a extid="usc_sup_01_26_10_A" href="/uscode/text/26/subtitle-A" title="Subtitle A - Income Taxes
 16 | ">Subtitle A</a> › <a extid="usc_sup_01_26_10_A_20_1" href="/uscode/text/26/subtitle-A/chapter-1" title="Chapter 1 - NORMAL TAXES AND SURTAXES
 17 | ">Chapter 1</a> › <a extid="usc_sup_01_26_10_A_20_1_30_A" href="/uscode/text/26/subtitle-A/chapter-1/subchapter-A" title="Subchapter A - Determination of Tax Liability">Subchapter A</a> › <a extid="usc_sup_01_26_10_A_20_1_30_A_40_II" href="/uscode/text/26/subtitle-A/chapter-1/subchapter-A/part-II" title="Part II - TAX ON CORPORATIONS">Part II</a> › § 11</li></ol>
 18 | <div class="section">
 19 |  <div class="num" value="11">
 20 |   § 11.
 21 |  </div>
 22 |  <div class="heading">
 23 |   Tax imposed
 24 |  </div>
 25 |  <div class="subsection indent2 firstIndent-2">
 26 |   <a name="a">
 27 |   </a>
 28 |   <span class="num bold" value="a">
 29 |    (a)
 30 |   </span>
 31 |   <span class="heading bold">
 32 |    Corporations in general
 33 |   </span>
 34 |   <div class="content">
 35 |    <p>
 36 |     A tax is hereby imposed for each taxable year on the taxable income of every corporation.
 37 |    </p>
 38 |   </div>
 39 |  </div>
 40 |  <div class="subsection indent2 firstIndent-2">
 41 |   <a name="b">
 42 |   </a>
 43 |   <span class="num bold" value="b">
 44 |    (b)
 45 |   </span>
 46 |   <span class="heading bold">
 47 |    Amount of tax
 48 |   </span>
 49 |   <div class="paragraph indent3 firstIndent-2">
 50 |    <a name="b_1">
 51 |    </a>
 52 |    <span class="num bold" value="1">
 53 |     (1)
 54 |    </span>
 55 |    <span class="heading bold">
 56 |     In general
 57 |    </span>
 58 |    <span class="chapeau">
 59 |     The amount of the tax imposed by subsection (a) shall be the sum of—
 60 |    </span>
 61 |    <div class="subparagraph indent2">
 62 |     <a name="b_1_A">
 63 |     </a>
 64 |     <span class="num" value="A">
 65 |      (A)
 66 |     </span>
 67 |     <div class="content">
 68 |      15 percent of so much of the taxable income as does not exceed $50,000,
 69 |     </div>
 70 |    </div>
 71 |    <div class="subparagraph indent2">
 72 |     <a name="b_1_B">
 73 |     </a>
 74 |     <span class="num" value="B">
 75 |      (B)
 76 |     </span>
 77 |     <div class="content">
 78 |      25 percent of so much of the taxable income as exceeds $50,000 but does not exceed $75,000,
 79 |     </div>
 80 |    </div>
 81 |    <div class="subparagraph indent2">
 82 |     <a name="b_1_C">
 83 |     </a>
 84 |     <span class="num" value="C">
 85 |      (C)
 86 |     </span>
 87 |     <div class="content">
 88 |      34 percent of so much of the taxable income as exceeds $75,000 but does not exceed $10,000,000, and
 89 |     </div>
 90 |    </div>
 91 |    <div class="subparagraph indent2">
 92 |     <a name="b_1_D">
 93 |     </a>
 94 |     <span class="num" value="D">
 95 |      (D)
 96 |     </span>
 97 |     <div class="content">
 98 |      35 percent of so much of the taxable income as exceeds $10,000,000.
 99 |     </div>
100 |    </div>
101 |    <div class="continuation indent1 firstIndent0">
102 |     In the case of a corporation which has taxable income in excess of $100,000 for any taxable year, the amount of tax determined under the preceding sentence for such taxable year shall be increased by the lesser of (i) 5 percent of such excess, or (ii) $11,750. In the case of a corporation which has taxable income in excess of $15,000,000, the amount of the tax determined under the foregoing provisions of this paragraph shall be increased by an additional amount equal to the lesser of (i) 3 percent of such excess, or (ii) $100,000.
103 |    </div>
104 |   </div>
105 |   <div class="paragraph indent3 firstIndent-2">
106 |    <a name="b_2">
107 |    </a>
108 |    <span class="num bold" value="2">
109 |     (2)
110 |    </span>
111 |    <span class="heading bold">
112 |     Certain personal service corporations not eligible for graduated rates
113 |    </span>
114 |    <div class="content">
115 |     <p>
116 |      Notwithstanding paragraph (1), the amount of the tax imposed by subsection (a) on the taxable income of a qualified personal service corporation (as defined in section 448(d)(2)) shall be equal to 35 percent of the taxable income.
117 |     </p>
118 |    </div>
119 |   </div>
120 |  </div>
121 |  <div class="subsection indent2 firstIndent-2">
122 |   <a name="c">
123 |   </a>
124 |   <span class="num bold" value="c">
125 |    (c)
126 |   </span>
127 |   <span class="heading bold">
128 |    Exceptions
129 |   </span>
130 |   <span class="chapeau">
131 |    Subsection (a) shall not apply to a corporation subject to a tax imposed by—
132 |   </span>
133 |   <div class="paragraph indent1">
134 |    <a name="c_1">
135 |    </a>
136 |    <span class="num" value="1">
137 |     (1)
138 |    </span>
139 |    <div class="content">
140 |     section 594 (relating to mutual savings banks conducting life insurance business),
141 |    </div>
142 |   </div>
143 |   <div class="paragraph indent1">
144 |    <a name="c_2">
145 |    </a>
146 |    <span class="num" value="2">
147 |     (2)
148 |    </span>
149 |    <div class="content">
150 |     subchapter L (sec. 801 and following, relating to insurance companies), or
151 |    </div>
152 |   </div>
153 |   <div class="paragraph indent1">
154 |    <a name="c_3">
155 |    </a>
156 |    <span class="num" value="3">
157 |     (3)
158 |    </span>
159 |    <div class="content">
160 |     subchapter M (sec. 851 and following, relating to regulated investment companies and real estate investment trusts).
161 |    </div>
162 |   </div>
163 |  </div>
164 |  <div class="subsection indent2 firstIndent-2">
165 |   <a name="d">
166 |   </a>
167 |   <span class="num bold" value="d">
168 |    (d)
169 |   </span>
170 |   <span class="heading bold">
171 |    Foreign corporations
172 |   </span>
173 |   <div class="content">
174 |    <p>
175 |     In the case of a foreign corporation, the taxes imposed by subsection (a) and section 55 shall apply only as provided by section 882.
176 |    </p>
177 |   </div>
178 |  </div>
179 |  <div class="sourceCredit">
180 |   (Aug. 16, 1954, ch. 736,
181 |   <a href="http://uscode.house.gov/statviewer.htm?volume=68A&amp;page=11" title="68A Stat. 11">
182 |    68A Stat. 11
183 |   </a>
184 |   ; Mar. 30, 1955, ch. 18, § 2,
185 |   <a href="http://uscode.house.gov/statviewer.htm?volume=69&amp;page=14" title="69 Stat. 14">
186 |    69 Stat. 14
187 |   </a>
188 |   ; Mar. 29, 1956, ch. 115, § 2,
189 |   <a href="http://uscode.house.gov/statviewer.htm?volume=70&amp;page=66" title="70 Stat. 66">
190 |    70 Stat. 66
191 |   </a>
192 |   ;
193 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 85–12">
194 |    Pub. L. 85–12
195 |   </a>
196 |   , § 2,
197 |   <span class="date" date="1957-03-29">
198 |    Mar. 29, 1957
199 |   </span>
200 |   ,
201 |   <a href="http://uscode.house.gov/statviewer.htm?volume=71&amp;page=9" title="71 Stat. 9">
202 |    71 Stat. 9
203 |   </a>
204 |   ;
205 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 85–475">
206 |    Pub. L. 85–475
207 |   </a>
208 |   , § 2,
209 |   <span class="date" date="1958-06-30">
210 |    June 30, 1958
211 |   </span>
212 |   ,
213 |   <a href="http://uscode.house.gov/statviewer.htm?volume=72&amp;page=259" title="72 Stat. 259">
214 |    72 Stat. 259
215 |   </a>
216 |   ;
217 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 86–75">
218 |    Pub. L. 86–75
219 |   </a>
220 |   , § 2,
221 |   <span class="date" date="1959-06-30">
222 |    June 30, 1959
223 |   </span>
224 |   ,
225 |   <a href="http://uscode.house.gov/statviewer.htm?volume=73&amp;page=157" title="73 Stat. 157">
226 |    73 Stat. 157
227 |   </a>
228 |   ;
229 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 86–564, title II">
230 |    Pub. L. 86–564, title II
231 |   </a>
232 |   , § 201,
233 |   <span class="date" date="1960-06-30">
234 |    June 30, 1960
235 |   </span>
236 |   ,
237 |   <a href="http://uscode.house.gov/statviewer.htm?volume=74&amp;page=290" title="74 Stat. 290">
238 |    74 Stat. 290
239 |   </a>
240 |   ;
241 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 86–779">
242 |    Pub. L. 86–779
243 |   </a>
244 |   , § 10(d),
245 |   <span class="date" date="1960-09-14">
246 |    Sept. 14, 1960
247 |   </span>
248 |   ,
249 |   <a href="http://uscode.house.gov/statviewer.htm?volume=74&amp;page=1009" title="74 Stat. 1009">
250 |    74 Stat. 1009
251 |   </a>
252 |   ;
253 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 87–72">
254 |    Pub. L. 87–72
255 |   </a>
256 |   , § 2,
257 |   <span class="date" date="1961-06-30">
258 |    June 30, 1961
259 |   </span>
260 |   ,
261 |   <a href="http://uscode.house.gov/statviewer.htm?volume=75&amp;page=193" title="75 Stat. 193">
262 |    75 Stat. 193
263 |   </a>
264 |   ;
265 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 87–508">
266 |    Pub. L. 87–508
267 |   </a>
268 |   , § 2,
269 |   <span class="date" date="1962-06-28">
270 |    June 28, 1962
271 |   </span>
272 |   ,
273 |   <a href="http://uscode.house.gov/statviewer.htm?volume=76&amp;page=114" title="76 Stat. 114">
274 |    76 Stat. 114
275 |   </a>
276 |   ;
277 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 88–52">
278 |    Pub. L. 88–52
279 |   </a>
280 |   , § 2,
281 |   <span class="date" date="1963-06-29">
282 |    June 29, 1963
283 |   </span>
284 |   ,
285 |   <a href="http://uscode.house.gov/statviewer.htm?volume=77&amp;page=72" title="77 Stat. 72">
286 |    77 Stat. 72
287 |   </a>
288 |   ;
289 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 88–272, title I">
290 |    Pub. L. 88–272, title I
291 |   </a>
292 |   , § 121,
293 |   <span class="date" date="1964-02-26">
294 |    Feb. 26, 1964
295 |   </span>
296 |   ,
297 |   <a href="http://uscode.house.gov/statviewer.htm?volume=78&amp;page=25" title="78 Stat. 25">
298 |    78 Stat. 25
299 |   </a>
300 |   ;
301 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 89–809, title I">
302 |    Pub. L. 89–809, title I
303 |   </a>
304 |   , § 104(b)(2),
305 |   <span class="date" date="1966-11-13">
306 |    Nov. 13, 1966
307 |   </span>
308 |   ,
309 |   <a href="http://uscode.house.gov/statviewer.htm?volume=80&amp;page=1557" title="80 Stat. 1557">
310 |    80 Stat. 1557
311 |   </a>
312 |   ;
313 |   <a href="http://www.gpo.gov/fdsys/browse/collection.action?collectionCode=PLAW" title="Pub. L. 91–172, title IV">
314 |    Pub. L. 91–172, title IV
315 |   </a>
316 |   , § 401(b)(2)(B),
317 |   <span class="date" date="1969-12-30">
318 |    Dec. 30, 1969
319 |   </span>
320 |   ,
321 |   <a href="http://uscode.house.gov/statviewer.htm?volume=83&amp;page=602" title="83 Stat. 602">
322 |    83 Stat. 602
323 |   </a>
324 |   ;
325 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d094:./list/bd/d094pl.lst:12(Public_Laws)" title="Pub. L. 94–12, title III">
326 |    Pub. L. 94–12, title III
327 |   </a>
328 |   , § 303(a), (b),
329 |   <span class="date" date="1975-03-29">
330 |    Mar. 29, 1975
331 |   </span>
332 |   ,
333 |   <a href="http://uscode.house.gov/statviewer.htm?volume=89&amp;page=44" title="89 Stat. 44">
334 |    89 Stat. 44
335 |   </a>
336 |   ;
337 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d094:./list/bd/d094pl.lst:164(Public_Laws)" title="Pub. L. 94–164">
338 |    Pub. L. 94–164
339 |   </a>
340 |   , § 4(a)–(c),
341 |   <span class="date" date="1975-12-23">
342 |    Dec. 23, 1975
343 |   </span>
344 |   ,
345 |   <a href="http://uscode.house.gov/statviewer.htm?volume=89&amp;page=973" title="89 Stat. 973">
346 |    89 Stat. 973
347 |   </a>
348 |   , 974;
349 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d094:./list/bd/d094pl.lst:455(Public_Laws)" title="Pub. L. 94–455, title IX">
350 |    Pub. L. 94–455, title IX
351 |   </a>
352 |   , § 901(a),
353 |   <span class="date" date="1976-10-04">
354 |    Oct. 4, 1976
355 |   </span>
356 |   ,
357 |   <a href="http://uscode.house.gov/statviewer.htm?volume=90&amp;page=1606" title="90 Stat. 1606">
358 |    90 Stat. 1606
359 |   </a>
360 |   ;
361 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d095:./list/bd/d095pl.lst:30(Public_Laws)" title="Pub. L. 95–30, title II">
362 |    Pub. L. 95–30, title II
363 |   </a>
364 |   , § 201(1), (2),
365 |   <span class="date" date="1977-05-23">
366 |    May 23, 1977
367 |   </span>
368 |   ,
369 |   <a href="http://uscode.house.gov/statviewer.htm?volume=91&amp;page=141" title="91 Stat. 141">
370 |    91 Stat. 141
371 |   </a>
372 |   ;
373 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d095:./list/bd/d095pl.lst:600(Public_Laws)" title="Pub. L. 95–600, title III">
374 |    Pub. L. 95–600, title III
375 |   </a>
376 |   , § 301(a),
377 |   <span class="date" date="1978-11-06">
378 |    Nov. 6, 1978
379 |   </span>
380 |   ,
381 |   <a href="http://uscode.house.gov/statviewer.htm?volume=92&amp;page=2820" title="92 Stat. 2820">
382 |    92 Stat. 2820
383 |   </a>
384 |   ;
385 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d097:./list/bd/d097pl.lst:34(Public_Laws)" title="Pub. L. 97–34, title II">
386 |    Pub. L. 97–34, title II
387 |   </a>
388 |   , § 231(a),
389 |   <span class="date" date="1981-08-13">
390 |    Aug. 13, 1981
391 |   </span>
392 |   ,
393 |   <a href="http://uscode.house.gov/statviewer.htm?volume=95&amp;page=249" title="95 Stat. 249">
394 |    95 Stat. 249
395 |   </a>
396 |   ;
397 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d098:./list/bd/d098pl.lst:369(Public_Laws)" title="Pub. L. 98–369, div. A, title I">
398 |    Pub. L. 98–369, div. A, title I
399 |   </a>
400 |   , § 66(a),
401 |   <span class="date" date="1984-07-18">
402 |    July 18, 1984
403 |   </span>
404 |   ,
405 |   <a href="http://uscode.house.gov/statviewer.htm?volume=98&amp;page=585" title="98 Stat. 585">
406 |    98 Stat. 585
407 |   </a>
408 |   ;
409 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d099:./list/bd/d099pl.lst:514(Public_Laws)" title="Pub. L. 99–514, title VI">
410 |    Pub. L. 99–514, title VI
411 |   </a>
412 |   , § 601(a),
413 |   <span class="date" date="1986-10-22">
414 |    Oct. 22, 1986
415 |   </span>
416 |   ,
417 |   <a href="http://uscode.house.gov/statviewer.htm?volume=100&amp;page=2249" title="100 Stat. 2249">
418 |    100 Stat. 2249
419 |   </a>
420 |   ;
421 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d100:./list/bd/d100pl.lst:203(Public_Laws)" title="Pub. L. 100–203, title X">
422 |    Pub. L. 100–203, title X
423 |   </a>
424 |   , § 10224(a),
425 |   <span class="date" date="1987-12-22">
426 |    Dec. 22, 1987
427 |   </span>
428 |   ,
429 |   <a href="http://uscode.house.gov/statviewer.htm?volume=101&amp;page=1330-412" title="101 Stat. 1330–412">
430 |    101 Stat. 1330–412
431 |   </a>
432 |   ;
433 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d100:./list/bd/d100pl.lst:647(Public_Laws)" title="Pub. L. 100–647, title I">
434 |    Pub. L. 100–647, title I
435 |   </a>
436 |   , § 1007(g)(13)(B),
437 |   <span class="date" date="1988-11-10">
438 |    Nov. 10, 1988
439 |   </span>
440 |   ,
441 |   <a href="http://uscode.house.gov/statviewer.htm?volume=102&amp;page=3436" title="102 Stat. 3436">
442 |    102 Stat. 3436
443 |   </a>
444 |   ;
445 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d103:./list/bd/d103pl.lst:66(Public_Laws)" title="Pub. L. 103–66, title XIII">
446 |    Pub. L. 103–66, title XIII
447 |   </a>
448 |   , § 13221(a), (b),
449 |   <span class="date" date="1993-08-10">
450 |    Aug. 10, 1993
451 |   </span>
452 |   ,
453 |   <a href="http://uscode.house.gov/statviewer.htm?volume=107&amp;page=477" title="107 Stat. 477">
454 |    107 Stat. 477
455 |   </a>
456 |   .)
457 |  </div>
458 | </div>
459 | 
460 | </html>
461 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/html/1.1.1.11.2.1.2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
 5 | <link type="text/css" rel="stylesheet" href="styles/css/css_kShW4RPmRstZ3SpIC-ZvVGNFVAi0WEMuCnI0ZkYIaFw.css" media="all" />
 6 | <link type="text/css" rel="stylesheet" href="styles/css_tuqeOBz1ozigHOvScJR2wasCmXBizZ9rfd58u6_20EE.css" media="all" />
 7 | <link type="text/css" rel="stylesheet" href="styles/css_dolo-SIAwemLdrlTs99Lrug9kFXMYlMG3OlznBv4Kho.css" media="all" />
 8 | <link type="text/css" rel="stylesheet" href="styles/css_XgGKW_fNRFCK5BruHWlbChY4U8WE0xT4CWGilKSjSXA.css" media="all" />
 9 | <link type="text/css" rel="stylesheet" href="styles/css_rJ3pqftttKVzxtjsOG18hAid4RqqjfFMw3d1C89lWd4.css" media="all" />
10 | <link type="text/css" rel="stylesheet" href="styles/css_Q4z0-iME7xTpui0Tzf4MEFv02rRuJ1dHZbo9kP_JLBg.css" media="all" />
11 | </head>
12 | 
13 | <title>26 U.S. Code § 722 - Basis of contributing partner’s interest | US Law | LII / Legal Information Institute</title>
14 | <h1 class="title" id="page-title">26 U.S. Code § 722 - Basis of contributing partner’s interest</h1>
15 | <ol class="breadcrumb" itemprop="breadcrumb"><li><a href="/uscode/text" title="United States Code">U.S. Code</a> › <a extid="usc_sup_01_26" href="/uscode/text/26" title="Title 26 - INTERNAL REVENUE CODE">Title 26</a> › <a extid="usc_sup_01_26_10_A" href="/uscode/text/26/subtitle-A" title="Subtitle A - Income Taxes
16 | ">Subtitle A</a> › <a extid="usc_sup_01_26_10_A_20_1" href="/uscode/text/26/subtitle-A/chapter-1" title="Chapter 1 - NORMAL TAXES AND SURTAXES
17 | ">Chapter 1</a> › <a extid="usc_sup_01_26_10_A_20_1_30_K" href="/uscode/text/26/subtitle-A/chapter-1/subchapter-K" title="Subchapter K - Partners and Partnerships">Subchapter K</a> › <a extid="usc_sup_01_26_10_A_20_1_30_K_40_II" href="/uscode/text/26/subtitle-A/chapter-1/subchapter-K/part-II" title="Part II - CONTRIBUTIONS, DISTRIBUTIONS, AND TRANSFERS">Part II</a> › <a extid="usc_sup_01_26_10_A_20_1_30_K_40_II_50_A" href="/uscode/text/26/subtitle-A/chapter-1/subchapter-K/part-II/subpart-A" title="Subpart A - Contributions to a Partnership">Subpart A</a> › § 722</li></ol>
18 | <div class="section">
19 |  <div class="num" value="722">
20 |   § 722.
21 |  </div>
22 |  <div class="heading">
23 |   Basis of contributing partner’s interest
24 |  </div>
25 |  <div class="content">
26 |   <p>
27 |    The basis of an interest in a partnership acquired by a contribution of property, including money, to the partnership shall be the amount of such money and the adjusted basis of such property to the contributing partner at the time of the contribution increased by the amount (if any) of gain recognized under section 721(b) to the contributing partner at such time.
28 |   </p>
29 |  </div>
30 |  <div class="sourceCredit">
31 |   (Aug. 16, 1954, ch. 736,
32 |   <a href="http://uscode.house.gov/statviewer.htm?volume=68A&amp;page=245" title="68A Stat. 245">
33 |    68A Stat. 245
34 |   </a>
35 |   ;
36 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d094:./list/bd/d094pl.lst:455(Public_Laws)" title="Pub. L. 94–455, title XXI">
37 |    Pub. L. 94–455, title XXI
38 |   </a>
39 |   , § 2131(c),
40 |   <span class="date" date="1976-10-04">
41 |    Oct. 4, 1976
42 |   </span>
43 |   ,
44 |   <a href="http://uscode.house.gov/statviewer.htm?volume=90&amp;page=1924" title="90 Stat. 1924">
45 |    90 Stat. 1924
46 |   </a>
47 |   ;
48 |   <a href="http://thomas.loc.gov/cgi-bin/bdquery/L?d098:./list/bd/d098pl.lst:369(Public_Laws)" title="Pub. L. 98–369, div. A, title VII">
49 |    Pub. L. 98–369, div. A, title VII
50 |   </a>
51 |   , § 722(f)(1),
52 |   <span class="date" date="1984-07-18">
53 |    July 18, 1984
54 |   </span>
55 |   ,
56 |   <a href="http://uscode.house.gov/statviewer.htm?volume=98&amp;page=974" title="98 Stat. 974">
57 |    98 Stat. 974
58 |   </a>
59 |   .)
60 |  </div>
61 | </div>
62 | 
63 | </html>
64 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/html/styles/css_XgGKW_fNRFCK5BruHWlbChY4U8WE0xT4CWGilKSjSXA.css:
--------------------------------------------------------------------------------
1 | .ctools-locked{color:red;border:1px solid red;padding:1em;}.ctools-owns-lock{background:#FFFFDD none repeat scroll 0 0;border:1px solid #F0C020;padding:1em;}a.ctools-ajaxing,input.ctools-ajaxing,button.ctools-ajaxing,select.ctools-ajaxing{padding-right:18px !important;background:url(/sites/all/modules/ctools/images/status-active.gif) right center no-repeat;}div.ctools-ajaxing{float:left;width:18px;background:url(/sites/all/modules/ctools/images/status-active.gif) center center no-repeat;}
2 | .quicktabs-hide{display:none;}ul.quicktabs-tabs{margin-top:0;}ul.quicktabs-tabs li{display:inline;background:none;list-style-type:none;padding:2px;white-space:nowrap;}ul.quicktabs-tabs li a:focus{outline:none;}
3 | .quicktabs_main.quicktabs-style-zen{clear:both;}ul.quicktabs-tabs.quicktabs-style-zen{margin:0 0 10px 0;padding:0 0 3px;font-size:1em;list-style:none;height:21px;background:transparent url(/sites/all/modules/quicktabs/quicktabs_tabstyles/tabstyles/zen/images/tab-bar.png) repeat-x left bottom;}*html ul.quicktabs-tabs.quicktabs-style-zen li{margin-bottom:-5px;}ul.quicktabs-tabs.quicktabs-style-zen li{float:left;margin:0 5px;padding:0 0 0 5px;background:transparent url(/sites/all/modules/quicktabs/quicktabs_tabstyles/tabstyles/zen/images/tab-left-ie6.png) no-repeat left -38px;}ul.quicktabs-tabs.quicktabs-style-zen li a{font:bold 12px/170% Verdana;font-size-adjust:none;display:block;margin:0;padding:4px 17px 0px 12px;border-width:0;font-weight:bold;text-decoration:none;background:transparent url(/sites/all/modules/quicktabs/quicktabs_tabstyles/tabstyles/zen/images/tab-right-ie6.png) no-repeat right -38px;}ul.quicktabs-tabs.quicktabs-style-zen li:hover a{border-width:0;background:transparent url(/sites/all/modules/quicktabs/quicktabs_tabstyles/tabstyles/zen/images/tab-right-ie6.png) no-repeat right -76px;}quicktabs-tabs.quicktabs-style-zen li:hover{background:transparent url(/sites/all/modules/quicktabs/quicktabs_tabstyles/tabstyles/zen/images/tab-left-ie6.png) no-repeat left -76px;}ul.quicktabs-tabs.quicktabs-style-zen li.active a,ul.quicktabs-tabs.quicktabs-style-zen li.active a:hover{text-decoration:none;border-width:0;background:transparent url(/sites/all/modules/quicktabs/quicktabs_tabstyles/tabstyles/zen/images/tab-right-ie6.png) no-repeat right 0;}ul.quicktabs-tabs.quicktabs-style-zen li.active{background:transparent url(/sites/all/modules/quicktabs/quicktabs_tabstyles/tabstyles/zen/images/tab-left-ie6.png) no-repeat left 0;}
4 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/html/styles/css_dolo-SIAwemLdrlTs99Lrug9kFXMYlMG3OlznBv4Kho.css:
--------------------------------------------------------------------------------
 1 | .footnotes{clear:both;margin-top:4em;margin-bottom:2em;border-top:1px solid #000000;}.footnotes{font-size:0.9em;}.see-footnote{vertical-align:top;position:relative;top:-0.25em;font-size:0.9em;}ul.footnotes{list-style-type:none;margin-left:0;padding-left:0;}ul.footnotes li{margin-left:2.5em;list-style-type:none;background:none;}ul.footnotes{position:relative;}.footnotes .footnote-label{position:absolute;left:0px;z-index:2;}.see-footnote:target,.footnotes .footnote:target{background-color:#eeeeee;}.see-footnote:target{border:solid 1px #aaaaaa;}.footnotes .footnote-multi{vertical-align:top;position:relative;top:-0.25em;font-size:0.75em;}#fn1{border-top:1px solid #000000;margin-top:3em;}.footnote{font-size:0.9em;}
 2 | .book-navigation .menu{border-top:1px solid #888;padding:1em 0 0 3em;}.book-navigation .page-links{border-top:1px solid #888;border-bottom:1px solid #888;text-align:center;padding:0.5em;}.book-navigation .page-previous{text-align:left;width:42%;display:block;float:left;}.book-navigation .page-up{margin:0 5%;width:4%;display:block;float:left;}.book-navigation .page-next{text-align:right;width:42%;display:block;float:right;}#book-outline{min-width:56em;}.book-outline-form .form-item{margin-top:0;margin-bottom:0;}html.js #edit-book-pick-book{display:none;}.form-item-book-bid .description{clear:both;}#book-admin-edit select{margin-right:24px;}#book-admin-edit select.progress-disabled{margin-right:0;}#book-admin-edit tr.ajax-new-content{background-color:#ffd;}#book-admin-edit .form-item{float:left;}
 3 | #comments{margin-top:15px;}.indented{margin-left:25px;}.comment-unpublished{background-color:#fff4f4;}.comment-preview{background-color:#ffffea;}
 4 | .container-inline-date{clear:both;}.container-inline-date .form-item{float:none;margin:0;padding:0;}.container-inline-date > .form-item{display:inline-block;margin-right:0.5em;margin-bottom:10px;vertical-align:top;}.container-inline-date .form-item .form-item{float:left;}.container-inline-date .form-item,.container-inline-date .form-item input{width:auto;}.container-inline-date .description{clear:both;}.container-inline-date .form-item input,.container-inline-date .form-item select,.container-inline-date .form-item option{margin-right:5px;}.container-inline-date .date-spacer{margin-left:-5px;}.views-right-60 .container-inline-date div{margin:0;padding:0;}.container-inline-date .date-timezone .form-item{clear:both;float:none;width:auto;}.container-inline-date .date-padding{padding:10px;float:left;}.views-exposed-form .container-inline-date .date-padding{padding:0;}#calendar_div,#calendar_div td,#calendar_div th{margin:0;padding:0;}#calendar_div,.calendar_control,.calendar_links,.calendar_header,.calendar{border-collapse:separate;margin:0;width:185px;}.calendar td{padding:0;}span.date-display-single{}span.date-display-start{}span.date-display-end{}.date-prefix-inline{display:inline-block;}.date-clear{clear:both;display:block;float:none;}.date-no-float{clear:both;float:none;width:98%;}.date-float{clear:none;float:left;width:auto;}.date-float .form-type-checkbox{padding-right:1em;}.form-type-date-select .form-type-select[class$=hour]{margin-left:.75em;}.date-container .date-format-delete{float:left;margin-top:1.8em;margin-left:1.5em;}.date-container .date-format-name{float:left;}.date-container .date-format-type{float:left;padding-left:10px;}.date-container .select-container{clear:left;float:left;}div.date-calendar-day{background:#F3F3F3;border-top:1px solid #EEE;border-left:1px solid #EEE;border-right:1px solid #BBB;border-bottom:1px solid #BBB;color:#999;float:left;line-height:1;margin:6px 10px 0 0;text-align:center;width:40px;}div.date-calendar-day span{display:block;text-align:center;}div.date-calendar-day span.month{background-color:#B5BEBE;color:white;font-size:.9em;padding:2px;text-transform:uppercase;}div.date-calendar-day span.day{font-size:2em;font-weight:bold;}div.date-calendar-day span.year{font-size:.9em;padding:2px;}.form-item.form-item-instance-widget-settings-input-format-custom,.form-item.form-item-field-settings-enddate-required{margin-left:1.3em;}#edit-field-settings-granularity .form-type-checkbox{margin-right:.6em;}.date-year-range-select{margin-right:1em;}
 5 | .field .field-label{font-weight:bold;}.field-label-inline .field-label,.field-label-inline .field-items{float:left;}form .field-multiple-table{margin:0;}form .field-multiple-table th.field-label{padding-left:0;}form .field-multiple-table td.field-multiple-drag{width:30px;padding-right:0;}form .field-multiple-table td.field-multiple-drag a.tabledrag-handle{padding-right:.5em;}form .field-add-more-submit{margin:.5em 0 0;}
 6 | .node-unpublished{background-color:#fff4f4;}.preview .node{background-color:#ffffea;}td.revision-current{background:#ffc;}
 7 | .search-form{margin-bottom:1em;}.search-form input{margin-top:0;margin-bottom:0;}.search-results{list-style:none;}.search-results p{margin-top:0;}.search-results .title{font-size:1.2em;}.search-results li{margin-bottom:1em;}.search-results .search-snippet-info{padding-left:1em;}.search-results .search-info{font-size:0.85em;}.search-advanced .criterion{float:left;margin-right:2em;}.search-advanced .action{float:left;clear:left;}
 8 | #permissions td.module{font-weight:bold;}#permissions td.permission{padding-left:1.5em;}#permissions tr.odd .form-item,#permissions tr.even .form-item{white-space:normal;}#user-admin-settings fieldset .fieldset-description{font-size:0.85em;padding-bottom:.5em;}#user-admin-roles td.edit-name{clear:both;}#user-admin-roles .form-item-name{float:left;margin-right:1em;}.password-strength{width:17em;float:right;margin-top:1.4em;}.password-strength-title{display:inline;}.password-strength-text{float:right;font-weight:bold;}.password-indicator{background-color:#C4C4C4;height:0.3em;width:100%;}.password-indicator div{height:100%;width:0%;background-color:#47C965;}input.password-confirm,input.password-field{width:16em;margin-bottom:0.4em;}div.password-confirm{float:right;margin-top:1.5em;visibility:hidden;width:17em;}div.form-item div.password-suggestions{padding:0.2em 0.5em;margin:0.7em 0;width:38.5em;border:1px solid #B4B4B4;}div.password-suggestions ul{margin-bottom:0;}.confirm-parent,.password-parent{clear:left;margin:0;width:36.3em;}.profile{clear:both;margin:1em 0;}.profile .user-picture{float:right;margin:0 1em 1em 0;}.profile h3{border-bottom:1px solid #ccc;}.profile dl{margin:0 0 1.5em 0;}.profile dt{margin:0 0 0.2em 0;font-weight:bold;}.profile dd{margin:0 0 1em 0;}
 9 | .views-exposed-form .views-exposed-widget{float:left;padding:.5em 1em 0 0;}.views-exposed-form .views-exposed-widget .form-submit{margin-top:1.6em;}.views-exposed-form .form-item,.views-exposed-form .form-submit{margin-top:0;margin-bottom:0;}.views-exposed-form label{font-weight:bold;}.views-exposed-widgets{margin-bottom:.5em;}.views-align-left{text-align:left;}.views-align-right{text-align:right;}.views-align-center{text-align:center;}.views-view-grid tbody{border-top:none;}.view .progress-disabled{float:none;}
10 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/html/styles/css_kShW4RPmRstZ3SpIC-ZvVGNFVAi0WEMuCnI0ZkYIaFw.css:
--------------------------------------------------------------------------------
1 | #autocomplete{border:1px solid;overflow:hidden;position:absolute;z-index:100;}#autocomplete ul{list-style:none;list-style-image:none;margin:0;padding:0;}#autocomplete li{background:#fff;color:#000;cursor:default;white-space:pre;zoom:1;}html.js input.form-autocomplete{background-image:url(/misc/throbber-inactive.png);background-position:100% center;background-repeat:no-repeat;}html.js input.throbbing{background-image:url(/misc/throbber-active.gif);background-position:100% center;}html.js fieldset.collapsed{border-bottom-width:0;border-left-width:0;border-right-width:0;height:1em;}html.js fieldset.collapsed .fieldset-wrapper{display:none;}fieldset.collapsible{position:relative;}fieldset.collapsible .fieldset-legend{display:block;}.form-textarea-wrapper textarea{display:block;margin:0;width:100%;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box;}.resizable-textarea .grippie{background:#eee url(/misc/grippie.png) no-repeat center 2px;border:1px solid #ddd;border-top-width:0;cursor:s-resize;height:9px;overflow:hidden;}body.drag{cursor:move;}.draggable a.tabledrag-handle{cursor:move;float:left;height:1.7em;margin-left:-1em;overflow:hidden;text-decoration:none;}a.tabledrag-handle:hover{text-decoration:none;}a.tabledrag-handle .handle{background:url(/misc/draggable.png) no-repeat 6px 9px;height:13px;margin:-0.4em 0.5em;padding:0.42em 0.5em;width:13px;}a.tabledrag-handle-hover .handle{background-position:6px -11px;}div.indentation{float:left;height:1.7em;margin:-0.4em 0.2em -0.4em -0.4em;padding:0.42em 0 0.42em 0.6em;width:20px;}div.tree-child{background:url(/misc/tree.png) no-repeat 11px center;}div.tree-child-last{background:url(/misc/tree-bottom.png) no-repeat 11px center;}div.tree-child-horizontal{background:url(/misc/tree.png) no-repeat -11px center;}.tabledrag-toggle-weight-wrapper{text-align:right;}table.sticky-header{background-color:#fff;margin-top:0;}.progress .bar{background-color:#fff;border:1px solid;}.progress .filled{background-color:#000;height:1.5em;width:5px;}.progress .percentage{float:right;}.ajax-progress{display:inline-block;}.ajax-progress .throbber{background:transparent url(/misc/throbber-active.gif) no-repeat 0px center;float:left;height:15px;margin:2px;width:15px;}.ajax-progress .message{padding-left:20px;}tr .ajax-progress .throbber{margin:0 2px;}.ajax-progress-bar{width:16em;}.container-inline div,.container-inline label{display:inline;}.container-inline .fieldset-wrapper{display:block;}.nowrap{white-space:nowrap;}html.js .js-hide{display:none;}.element-hidden{display:none;}.element-invisible{position:absolute !important;clip:rect(1px 1px 1px 1px);clip:rect(1px,1px,1px,1px);overflow:hidden;height:1px;}.element-invisible.element-focusable:active,.element-invisible.element-focusable:focus{position:static !important;clip:auto;overflow:visible;height:auto;}.clearfix:after{content:".";display:block;height:0;clear:both;visibility:hidden;}* html .clearfix{height:1%;}*:first-child + html .clearfix{min-height:1%;}
2 | div.messages{background-position:8px 8px;background-repeat:no-repeat;border:1px solid;margin:6px 0;padding:10px 10px 10px 50px;}div.status{background-image:url(/misc/message-24-ok.png);border-color:#be7;}div.status,.ok{color:#234600;}div.status,table tr.ok{background-color:#f8fff0;}div.warning{background-image:url(/misc/message-24-warning.png);border-color:#ed5;}div.warning,.warning{color:#840;}div.warning,table tr.warning{background-color:#fffce5;}div.error{background-image:url(/misc/message-24-error.png);border-color:#ed541d;}div.error,.error{color:#8c2e0b;}div.error,table tr.error{background-color:#fef5f1;}div.error p.error{color:#333;}div.messages ul{margin:0 0 0 1em;padding:0;}div.messages ul li{list-style-image:none;}
3 | fieldset{margin-bottom:1em;padding:0.5em;}form{margin:0;padding:0;}hr{border:1px solid gray;height:1px;}img{border:0;}table{border-collapse:collapse;}th{border-bottom:3px solid #ccc;padding-right:1em;text-align:left;}tbody{border-top:1px solid #ccc;}tr.even,tr.odd{background-color:#eee;border-bottom:1px solid #ccc;padding:0.1em 0.6em;}th.active img{display:inline;}td.active{background-color:#ddd;}.item-list .title{font-weight:bold;}.item-list ul{margin:0 0 0.75em 0;padding:0;}.item-list ul li{margin:0 0 0.25em 1.5em;padding:0;}.form-item,.form-actions{margin-top:1em;margin-bottom:1em;}tr.odd .form-item,tr.even .form-item{margin-top:0;margin-bottom:0;white-space:nowrap;}.form-item .description{font-size:0.85em;}label{display:block;font-weight:bold;}label.option{display:inline;font-weight:normal;}.form-checkboxes .form-item,.form-radios .form-item{margin-top:0.4em;margin-bottom:0.4em;}.form-type-radio .description,.form-type-checkbox .description{margin-left:2.4em;}input.form-checkbox,input.form-radio{vertical-align:middle;}.marker,.form-required{color:#f00;}.form-item input.error,.form-item textarea.error,.form-item select.error{border:2px solid red;}.container-inline .form-actions,.container-inline.form-actions{margin-top:0;margin-bottom:0;}.more-link{text-align:right;}.more-help-link{text-align:right;}.more-help-link a{background:url(/misc/help.png) 0 50% no-repeat;padding:1px 0 1px 20px;}.item-list .pager{clear:both;text-align:center;}.item-list .pager li{background-image:none;display:inline;list-style-type:none;padding:0.5em;}.pager-current{font-weight:bold;}#autocomplete li.selected{background:#0072b9;color:#fff;}html.js fieldset.collapsible .fieldset-legend{background:url(/misc/menu-expanded.png) 5px 65% no-repeat;padding-left:15px;}html.js fieldset.collapsed .fieldset-legend{background-image:url(/misc/menu-collapsed.png);background-position:5px 50%;}.fieldset-legend span.summary{color:#999;font-size:0.9em;margin-left:0.5em;}tr.drag{background-color:#fffff0;}tr.drag-previous{background-color:#ffd;}.tabledrag-toggle-weight{font-size:0.9em;}body div.tabledrag-changed-warning{margin-bottom:0.5em;}tr.selected td{background:#ffc;}td.checkbox,th.checkbox{text-align:center;}.progress{font-weight:bold;}.progress .bar{background:#ccc;border-color:#666;margin:0 0.2em;-moz-border-radius:3px;-webkit-border-radius:3px;border-radius:3px;}.progress .filled{background:#0072b9 url(/misc/progress.gif);}
4 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/html/styles/css_rJ3pqftttKVzxtjsOG18hAid4RqqjfFMw3d1C89lWd4.css:
--------------------------------------------------------------------------------
1 | a[rel='lightbox[lii_cfr_content_img]'] img{width:auto;min-height:50px;padding-right:25px;background:url(/sites/all/themes/liizenboot/colorbox/images/zoom_rotate.gif) no-repeat top right #dce0df;}#cboxRotateLeft,#cboxRotateRight,#cboxZoomOut,#cboxZoomIn{border:0;padding:0;margin:0;overflow:visible;width:auto;background:none;cursor:pointer;}#cboxRotateLeft{background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat scroll -26px -122px transparent;bottom:0;height:25px;right:0;position:absolute;text-indent:-9999px;width:25px;}#cboxRotateLeft:hover{background-position:-51px -122px;}#cboxRotateRight{background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat scroll -26px -98px transparent;bottom:0;height:25px;right:25px;position:absolute;text-indent:-9999px;width:25px;}#cboxRotateRight:hover{background-position:-51px -98px}#cboxZoomOut{background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat scroll -76px -74px transparent;bottom:0;height:25px;right:50px;position:absolute;text-indent:-9999px;width:25px;}#cboxZoomOut:hover{background-position:-102px -74px;}#cboxZoomIn{background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat scroll -76px -98px transparent;bottom:0;height:25px;right:50px;position:absolute;text-indent:-9999px;width:25px;}#cboxZoomIn:hover{background-position:-101px -99px;}#cboxPlay{background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat scroll -26px -51px transparent;bottom:0;height:25px;left:54px;position:absolute;text-indent:-9999px;width:25px;}#cboxPlay:hover{background-position:-51px -51px;}#cboxPause{background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat scroll -77px -51px transparent;bottom:0;height:25px;left:54px;position:absolute;text-indent:-9999px;width:25px;}#cboxPause:hover{background-position:-102px -51px;}div#cboxContent #cboxCurrent{left:138px !important;}.cboxPhoto{min-width:225px;height:auto;}
2 | #colorbox,#cboxOverlay,#cboxWrapper{position:absolute;top:0;left:0;z-index:9999;overflow:hidden;}#cboxOverlay{position:fixed;width:100%;height:100%;}#cboxMiddleLeft,#cboxBottomLeft{clear:left;}#cboxContent{position:relative;}#cboxLoadedContent{overflow:auto;-webkit-overflow-scrolling:touch;}#cboxTitle{margin:0;}#cboxLoadingOverlay,#cboxLoadingGraphic{position:absolute;top:0;left:0;width:100%;height:100%;}#cboxPrevious,#cboxNext,#cboxClose,#cboxSlideshow{border:0;padding:0;margin:0;overflow:visible;width:auto;background:none;cursor:pointer;}#cboxPrevious:active,#cboxNext:active,#cboxClose:active,#cboxSlideshow:active{outline:0;}.cboxPhoto{float:left;margin:auto;border:0;display:block;max-width:none;}.cboxIframe{width:100%;height:100%;display:block;border:0;}#colorbox,#cboxContent,#cboxLoadedContent{-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box;}#cboxOverlay{background:#000;}#colorbox{outline:0;}#cboxWrapper{background:#fff;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px;}#cboxTopLeft{width:15px;height:15px;}#cboxTopCenter{height:15px;}#cboxTopRight{width:15px;height:15px;}#cboxBottomLeft{width:15px;height:10px;}#cboxBottomCenter{height:10px;}#cboxBottomRight{width:15px;height:10px;}#cboxMiddleLeft{width:15px;}#cboxMiddleRight{width:15px;}#cboxContent{background:#fff;overflow:hidden;}#cboxError{padding:50px;border:1px solid #ccc;}#cboxLoadedContent{margin-bottom:28px;}#cboxTitle{position:absolute;background:rgba(255,255,255,0.7);bottom:28px;left:0;color:#535353;width:100%;padding:4px 6px;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box;}#cboxCurrent{position:absolute;bottom:4px;left:60px;color:#949494;}.cboxSlideshow_on #cboxSlideshow{position:absolute;bottom:0px;right:30px;background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat -75px -50px;width:25px;height:25px;text-indent:-9999px;}.cboxSlideshow_on #cboxSlideshow:hover{background-position:-101px -50px;}.cboxSlideshow_off #cboxSlideshow{position:absolute;bottom:0px;right:30px;background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat -25px -50px;width:25px;height:25px;text-indent:-9999px;}.cboxSlideshow_off #cboxSlideshow:hover{background-position:-49px -50px;}#cboxPrevious{position:absolute;bottom:0;left:0;background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat -75px 0px;width:25px;height:25px;text-indent:-9999px;}#cboxPrevious:hover{background-position:-75px -25px;}#cboxNext{position:absolute;bottom:0;left:27px;background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat -50px 0px;width:25px;height:25px;text-indent:-9999px;}#cboxNext:hover{background-position:-50px -25px;}#cboxLoadingOverlay{background:#fff;}#cboxLoadingGraphic{background:url(/sites/all/themes/liizenboot/colorbox/images/loading_animation.gif) no-repeat center center;}#cboxClose{position:absolute;top:0;right:0;background:url(/sites/all/themes/liizenboot/colorbox/images/controls.png) no-repeat -25px 0px;width:25px;height:25px;text-indent:-9999px;}#cboxClose:hover{background-position:-25px -25px;}
3 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/html/styles/css_tuqeOBz1ozigHOvScJR2wasCmXBizZ9rfd58u6_20EE.css:
--------------------------------------------------------------------------------
1 | .ui-helper-hidden{display:none;}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px;}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none;}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse;}.ui-helper-clearfix:after{clear:both;}.ui-helper-clearfix{min-height:0;}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;filter:Alpha(Opacity=0);}.ui-front{z-index:100;}.ui-state-disabled{cursor:default !important;}.ui-icon{display:block;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat;}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%;}
2 | .ui-widget{font-family:Verdana,Arial,sans-serif;font-size:1.1em;}.ui-widget .ui-widget{font-size:1em;}.ui-widget input,.ui-widget select,.ui-widget textarea,.ui-widget button{font-family:Verdana,Arial,sans-serif;font-size:1em;}.ui-widget-content{border:1px solid #aaaaaa;background:#ffffff url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_flat_75_ffffff_40x100.png) 50% 50% repeat-x;color:#222222;}.ui-widget-content a{color:#222222;}.ui-widget-header{border:1px solid #aaaaaa;background:#cccccc url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_highlight-soft_75_cccccc_1x100.png) 50% 50% repeat-x;color:#222222;font-weight:bold;}.ui-widget-header a{color:#222222;}.ui-state-default,.ui-widget-content .ui-state-default,.ui-widget-header .ui-state-default{border:1px solid #d3d3d3;background:#e6e6e6 url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_glass_75_e6e6e6_1x400.png) 50% 50% repeat-x;font-weight:normal;color:#555555;}.ui-state-default a,.ui-state-default a:link,.ui-state-default a:visited{color:#555555;text-decoration:none;}.ui-state-hover,.ui-widget-content .ui-state-hover,.ui-widget-header .ui-state-hover,.ui-state-focus,.ui-widget-content .ui-state-focus,.ui-widget-header .ui-state-focus{border:1px solid #999999;background:#dadada url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_glass_75_dadada_1x400.png) 50% 50% repeat-x;font-weight:normal;color:#212121;}.ui-state-hover a,.ui-state-hover a:hover,.ui-state-hover a:link,.ui-state-hover a:visited{color:#212121;text-decoration:none;}.ui-state-active,.ui-widget-content .ui-state-active,.ui-widget-header .ui-state-active{border:1px solid #aaaaaa;background:#ffffff url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_glass_65_ffffff_1x400.png) 50% 50% repeat-x;font-weight:normal;color:#212121;}.ui-state-active a,.ui-state-active a:link,.ui-state-active a:visited{color:#212121;text-decoration:none;}.ui-state-highlight,.ui-widget-content .ui-state-highlight,.ui-widget-header .ui-state-highlight{border:1px solid #fcefa1;background:#fbf9ee url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_glass_55_fbf9ee_1x400.png) 50% 50% repeat-x;color:#363636;}.ui-state-highlight a,.ui-widget-content .ui-state-highlight a,.ui-widget-header .ui-state-highlight a{color:#363636;}.ui-state-error,.ui-widget-content .ui-state-error,.ui-widget-header .ui-state-error{border:1px solid #cd0a0a;background:#fef1ec url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_glass_95_fef1ec_1x400.png) 50% 50% repeat-x;color:#cd0a0a;}.ui-state-error a,.ui-widget-content .ui-state-error a,.ui-widget-header .ui-state-error a{color:#cd0a0a;}.ui-state-error-text,.ui-widget-content .ui-state-error-text,.ui-widget-header .ui-state-error-text{color:#cd0a0a;}.ui-priority-primary,.ui-widget-content .ui-priority-primary,.ui-widget-header .ui-priority-primary{font-weight:bold;}.ui-priority-secondary,.ui-widget-content .ui-priority-secondary,.ui-widget-header .ui-priority-secondary{opacity:.7;filter:Alpha(Opacity=70);font-weight:normal;}.ui-state-disabled,.ui-widget-content .ui-state-disabled,.ui-widget-header .ui-state-disabled{opacity:.35;filter:Alpha(Opacity=35);background-image:none;}.ui-state-disabled .ui-icon{filter:Alpha(Opacity=35);}.ui-icon{width:16px;height:16px;}.ui-icon,.ui-widget-content .ui-icon{background-image:url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-icons_222222_256x240.png);}.ui-widget-header .ui-icon{background-image:url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-icons_222222_256x240.png);}.ui-state-default .ui-icon{background-image:url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-icons_888888_256x240.png);}.ui-state-hover .ui-icon,.ui-state-focus .ui-icon{background-image:url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-icons_454545_256x240.png);}.ui-state-active .ui-icon{background-image:url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-icons_454545_256x240.png);}.ui-state-highlight .ui-icon{background-image:url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-icons_2e83ff_256x240.png);}.ui-state-error .ui-icon,.ui-state-error-text .ui-icon{background-image:url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-icons_cd0a0a_256x240.png);}.ui-icon-blank{background-position:16px 16px;}.ui-icon-carat-1-n{background-position:0 0;}.ui-icon-carat-1-ne{background-position:-16px 0;}.ui-icon-carat-1-e{background-position:-32px 0;}.ui-icon-carat-1-se{background-position:-48px 0;}.ui-icon-carat-1-s{background-position:-64px 0;}.ui-icon-carat-1-sw{background-position:-80px 0;}.ui-icon-carat-1-w{background-position:-96px 0;}.ui-icon-carat-1-nw{background-position:-112px 0;}.ui-icon-carat-2-n-s{background-position:-128px 0;}.ui-icon-carat-2-e-w{background-position:-144px 0;}.ui-icon-triangle-1-n{background-position:0 -16px;}.ui-icon-triangle-1-ne{background-position:-16px -16px;}.ui-icon-triangle-1-e{background-position:-32px -16px;}.ui-icon-triangle-1-se{background-position:-48px -16px;}.ui-icon-triangle-1-s{background-position:-64px -16px;}.ui-icon-triangle-1-sw{background-position:-80px -16px;}.ui-icon-triangle-1-w{background-position:-96px -16px;}.ui-icon-triangle-1-nw{background-position:-112px -16px;}.ui-icon-triangle-2-n-s{background-position:-128px -16px;}.ui-icon-triangle-2-e-w{background-position:-144px -16px;}.ui-icon-arrow-1-n{background-position:0 -32px;}.ui-icon-arrow-1-ne{background-position:-16px -32px;}.ui-icon-arrow-1-e{background-position:-32px -32px;}.ui-icon-arrow-1-se{background-position:-48px -32px;}.ui-icon-arrow-1-s{background-position:-64px -32px;}.ui-icon-arrow-1-sw{background-position:-80px -32px;}.ui-icon-arrow-1-w{background-position:-96px -32px;}.ui-icon-arrow-1-nw{background-position:-112px -32px;}.ui-icon-arrow-2-n-s{background-position:-128px -32px;}.ui-icon-arrow-2-ne-sw{background-position:-144px -32px;}.ui-icon-arrow-2-e-w{background-position:-160px -32px;}.ui-icon-arrow-2-se-nw{background-position:-176px -32px;}.ui-icon-arrowstop-1-n{background-position:-192px -32px;}.ui-icon-arrowstop-1-e{background-position:-208px -32px;}.ui-icon-arrowstop-1-s{background-position:-224px -32px;}.ui-icon-arrowstop-1-w{background-position:-240px -32px;}.ui-icon-arrowthick-1-n{background-position:0 -48px;}.ui-icon-arrowthick-1-ne{background-position:-16px -48px;}.ui-icon-arrowthick-1-e{background-position:-32px -48px;}.ui-icon-arrowthick-1-se{background-position:-48px -48px;}.ui-icon-arrowthick-1-s{background-position:-64px -48px;}.ui-icon-arrowthick-1-sw{background-position:-80px -48px;}.ui-icon-arrowthick-1-w{background-position:-96px -48px;}.ui-icon-arrowthick-1-nw{background-position:-112px -48px;}.ui-icon-arrowthick-2-n-s{background-position:-128px -48px;}.ui-icon-arrowthick-2-ne-sw{background-position:-144px -48px;}.ui-icon-arrowthick-2-e-w{background-position:-160px -48px;}.ui-icon-arrowthick-2-se-nw{background-position:-176px -48px;}.ui-icon-arrowthickstop-1-n{background-position:-192px -48px;}.ui-icon-arrowthickstop-1-e{background-position:-208px -48px;}.ui-icon-arrowthickstop-1-s{background-position:-224px -48px;}.ui-icon-arrowthickstop-1-w{background-position:-240px -48px;}.ui-icon-arrowreturnthick-1-w{background-position:0 -64px;}.ui-icon-arrowreturnthick-1-n{background-position:-16px -64px;}.ui-icon-arrowreturnthick-1-e{background-position:-32px -64px;}.ui-icon-arrowreturnthick-1-s{background-position:-48px -64px;}.ui-icon-arrowreturn-1-w{background-position:-64px -64px;}.ui-icon-arrowreturn-1-n{background-position:-80px -64px;}.ui-icon-arrowreturn-1-e{background-position:-96px -64px;}.ui-icon-arrowreturn-1-s{background-position:-112px -64px;}.ui-icon-arrowrefresh-1-w{background-position:-128px -64px;}.ui-icon-arrowrefresh-1-n{background-position:-144px -64px;}.ui-icon-arrowrefresh-1-e{background-position:-160px -64px;}.ui-icon-arrowrefresh-1-s{background-position:-176px -64px;}.ui-icon-arrow-4{background-position:0 -80px;}.ui-icon-arrow-4-diag{background-position:-16px -80px;}.ui-icon-extlink{background-position:-32px -80px;}.ui-icon-newwin{background-position:-48px -80px;}.ui-icon-refresh{background-position:-64px -80px;}.ui-icon-shuffle{background-position:-80px -80px;}.ui-icon-transfer-e-w{background-position:-96px -80px;}.ui-icon-transferthick-e-w{background-position:-112px -80px;}.ui-icon-folder-collapsed{background-position:0 -96px;}.ui-icon-folder-open{background-position:-16px -96px;}.ui-icon-document{background-position:-32px -96px;}.ui-icon-document-b{background-position:-48px -96px;}.ui-icon-note{background-position:-64px -96px;}.ui-icon-mail-closed{background-position:-80px -96px;}.ui-icon-mail-open{background-position:-96px -96px;}.ui-icon-suitcase{background-position:-112px -96px;}.ui-icon-comment{background-position:-128px -96px;}.ui-icon-person{background-position:-144px -96px;}.ui-icon-print{background-position:-160px -96px;}.ui-icon-trash{background-position:-176px -96px;}.ui-icon-locked{background-position:-192px -96px;}.ui-icon-unlocked{background-position:-208px -96px;}.ui-icon-bookmark{background-position:-224px -96px;}.ui-icon-tag{background-position:-240px -96px;}.ui-icon-home{background-position:0 -112px;}.ui-icon-flag{background-position:-16px -112px;}.ui-icon-calendar{background-position:-32px -112px;}.ui-icon-cart{background-position:-48px -112px;}.ui-icon-pencil{background-position:-64px -112px;}.ui-icon-clock{background-position:-80px -112px;}.ui-icon-disk{background-position:-96px -112px;}.ui-icon-calculator{background-position:-112px -112px;}.ui-icon-zoomin{background-position:-128px -112px;}.ui-icon-zoomout{background-position:-144px -112px;}.ui-icon-search{background-position:-160px -112px;}.ui-icon-wrench{background-position:-176px -112px;}.ui-icon-gear{background-position:-192px -112px;}.ui-icon-heart{background-position:-208px -112px;}.ui-icon-star{background-position:-224px -112px;}.ui-icon-link{background-position:-240px -112px;}.ui-icon-cancel{background-position:0 -128px;}.ui-icon-plus{background-position:-16px -128px;}.ui-icon-plusthick{background-position:-32px -128px;}.ui-icon-minus{background-position:-48px -128px;}.ui-icon-minusthick{background-position:-64px -128px;}.ui-icon-close{background-position:-80px -128px;}.ui-icon-closethick{background-position:-96px -128px;}.ui-icon-key{background-position:-112px -128px;}.ui-icon-lightbulb{background-position:-128px -128px;}.ui-icon-scissors{background-position:-144px -128px;}.ui-icon-clipboard{background-position:-160px -128px;}.ui-icon-copy{background-position:-176px -128px;}.ui-icon-contact{background-position:-192px -128px;}.ui-icon-image{background-position:-208px -128px;}.ui-icon-video{background-position:-224px -128px;}.ui-icon-script{background-position:-240px -128px;}.ui-icon-alert{background-position:0 -144px;}.ui-icon-info{background-position:-16px -144px;}.ui-icon-notice{background-position:-32px -144px;}.ui-icon-help{background-position:-48px -144px;}.ui-icon-check{background-position:-64px -144px;}.ui-icon-bullet{background-position:-80px -144px;}.ui-icon-radio-on{background-position:-96px -144px;}.ui-icon-radio-off{background-position:-112px -144px;}.ui-icon-pin-w{background-position:-128px -144px;}.ui-icon-pin-s{background-position:-144px -144px;}.ui-icon-play{background-position:0 -160px;}.ui-icon-pause{background-position:-16px -160px;}.ui-icon-seek-next{background-position:-32px -160px;}.ui-icon-seek-prev{background-position:-48px -160px;}.ui-icon-seek-end{background-position:-64px -160px;}.ui-icon-seek-start{background-position:-80px -160px;}.ui-icon-seek-first{background-position:-80px -160px;}.ui-icon-stop{background-position:-96px -160px;}.ui-icon-eject{background-position:-112px -160px;}.ui-icon-volume-off{background-position:-128px -160px;}.ui-icon-volume-on{background-position:-144px -160px;}.ui-icon-power{background-position:0 -176px;}.ui-icon-signal-diag{background-position:-16px -176px;}.ui-icon-signal{background-position:-32px -176px;}.ui-icon-battery-0{background-position:-48px -176px;}.ui-icon-battery-1{background-position:-64px -176px;}.ui-icon-battery-2{background-position:-80px -176px;}.ui-icon-battery-3{background-position:-96px -176px;}.ui-icon-circle-plus{background-position:0 -192px;}.ui-icon-circle-minus{background-position:-16px -192px;}.ui-icon-circle-close{background-position:-32px -192px;}.ui-icon-circle-triangle-e{background-position:-48px -192px;}.ui-icon-circle-triangle-s{background-position:-64px -192px;}.ui-icon-circle-triangle-w{background-position:-80px -192px;}.ui-icon-circle-triangle-n{background-position:-96px -192px;}.ui-icon-circle-arrow-e{background-position:-112px -192px;}.ui-icon-circle-arrow-s{background-position:-128px -192px;}.ui-icon-circle-arrow-w{background-position:-144px -192px;}.ui-icon-circle-arrow-n{background-position:-160px -192px;}.ui-icon-circle-zoomin{background-position:-176px -192px;}.ui-icon-circle-zoomout{background-position:-192px -192px;}.ui-icon-circle-check{background-position:-208px -192px;}.ui-icon-circlesmall-plus{background-position:0 -208px;}.ui-icon-circlesmall-minus{background-position:-16px -208px;}.ui-icon-circlesmall-close{background-position:-32px -208px;}.ui-icon-squaresmall-plus{background-position:-48px -208px;}.ui-icon-squaresmall-minus{background-position:-64px -208px;}.ui-icon-squaresmall-close{background-position:-80px -208px;}.ui-icon-grip-dotted-vertical{background-position:0 -224px;}.ui-icon-grip-dotted-horizontal{background-position:-16px -224px;}.ui-icon-grip-solid-vertical{background-position:-32px -224px;}.ui-icon-grip-solid-horizontal{background-position:-48px -224px;}.ui-icon-gripsmall-diagonal-se{background-position:-64px -224px;}.ui-icon-grip-diagonal-se{background-position:-80px -224px;}.ui-corner-all,.ui-corner-top,.ui-corner-left,.ui-corner-tl{border-top-left-radius:4px;}.ui-corner-all,.ui-corner-top,.ui-corner-right,.ui-corner-tr{border-top-right-radius:4px;}.ui-corner-all,.ui-corner-bottom,.ui-corner-left,.ui-corner-bl{border-bottom-left-radius:4px;}.ui-corner-all,.ui-corner-bottom,.ui-corner-right,.ui-corner-br{border-bottom-right-radius:4px;}.ui-widget-overlay{background:#aaaaaa url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_flat_0_aaaaaa_40x100.png) 50% 50% repeat-x;opacity:.3;filter:Alpha(Opacity=30);}.ui-widget-shadow{margin:-8px 0 0 -8px;padding:8px;background:#aaaaaa url(/sites/all/modules/jquery_update/replace/ui/themes/base/images/ui-bg_flat_0_aaaaaa_40x100.png) 50% 50% repeat-x;opacity:.3;filter:Alpha(Opacity=30);border-radius:8px;}
3 | 


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/parsed_content.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Services_CustomSearchExpertSystems/sample/parsed_content.xlsx


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/parsed_content_cornell_full.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Services_CustomSearchExpertSystems/sample/parsed_content_cornell_full.xlsx


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/parsed_content_sample.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Services_CustomSearchExpertSystems/sample/parsed_content_sample.xlsx


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/raw_text_enriched_with_keywords_sample.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Services_CustomSearchExpertSystems/sample/raw_text_enriched_with_keywords_sample.xlsx


--------------------------------------------------------------------------------
/Services_CustomSearchExpertSystems/sample/sample_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Services_CustomSearchExpertSystems/sample/sample_page.png


--------------------------------------------------------------------------------
/Sports_IoTSensorSkillClassification/AUC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Sports_IoTSensorSkillClassification/AUC.png


--------------------------------------------------------------------------------
/Sports_IoTSensorSkillClassification/rawdata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Sports_IoTSensorSkillClassification/rawdata.png


--------------------------------------------------------------------------------
/Sports_IoTSensorSkillClassification/sensorpositions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Sports_IoTSensorSkillClassification/sensorpositions.png


--------------------------------------------------------------------------------
/Sports_IoTSensorSkillClassification/timeseriesraw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Sports_IoTSensorSkillClassification/timeseriesraw.png


--------------------------------------------------------------------------------
/Sports_IoTSensorSkillClassification/trunktwistdef.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Sports_IoTSensorSkillClassification/trunktwistdef.png


--------------------------------------------------------------------------------
/Sports_IoTSensorSkillClassification/trunktwistmax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CatalystCode/JupyterCon2017/41486c7a00c9de76a020295c2c5668d41f52a21d/Sports_IoTSensorSkillClassification/trunktwistmax.png


--------------------------------------------------------------------------------