├── LICENSE
├── README.md
├── hbase-hive-pig
    ├── README.md
    ├── create_hbase_table.sql
    └── load_hbase.pig
├── hbase-tables
    ├── README.md
    ├── create_schemas.py
    ├── data
    │   ├── gethue.com.html
    │   ├── gethue.pdf
    │   └── hue-logo.png
    ├── load_binary.py
    ├── load_data.log
    └── load_data.sh
├── hcatalog
    ├── README.md
    ├── avg_salary.hql
    └── avg_salary.pig
├── hive-udf
    ├── README.md
    ├── myudfs.jar
    ├── org
    │   └── hue
    │   │   └── udf
    │   │       └── MyUpper.java
    └── pom.xml
├── hive-workflow
    ├── README.md
    ├── create_table.hql
    ├── insert_table.hql
    ├── select_table.hql
    └── select_top_n.sql
├── hue-ha
    ├── README.md
    └── howto.txt
├── hue-saml
    ├── shibboleth-conf
    │   ├── attribute-filter.xml
    │   ├── attribute-resolver.xml
    │   ├── handler.xml
    │   ├── internal.xml
    │   ├── logging.xml
    │   ├── login.config
    │   ├── relying-party.xml
    │   └── service.xml
    └── tomcat6-conf
    │   ├── server.xml
    │   └── web.xml
├── impala
    ├── README.md
    ├── avro_converter.pig
    ├── create_avro_table.sql
    ├── create_parquet_table.sql
    ├── sample_tables
    │   ├── alltypes.sql
    │   ├── alltypes.zip
    │   └── table_100_cols
    │   │   ├── 100_cols.txt
    │   │   └── create.sql
    └── select_table.hql
├── notebook
    └── shared_rdd
    │   ├── README.md
    │   ├── hue-sharedrdd-notebook.json
    │   ├── shareable_rdd.py
    │   ├── shared_rdd.ipynb
    │   └── shared_rdd.py
├── oozie
    ├── credentials
    │   ├── hive-config.xml
    │   ├── hive.sql
    │   └── workflow.xml
    ├── el-functions
    │   └── rkanter
    │   │   └── MyELFunctions.java
    ├── hiveserver2-action
    │   ├── hive-site3.xml
    │   ├── select_genericl.sql
    │   └── workflow.xml
    └── workflow_demo
    │   ├── bundle.xml
    │   ├── coordinator.xml
    │   ├── job.properties
    │   ├── lib
    │       └── piggybank.jar
    │   ├── script.pig
    │   └── workflow.xml
├── pig-json-python-udf
    ├── README.md
    ├── clean_json.pig
    ├── converter.py
    └── python_udf.pig
├── search
    └── indexing
    │   ├── README.md
    │   └── apache_logs.py
├── solr-local-search
    ├── README.md
    ├── data_subset.sql
    ├── load_index.sh
    ├── solr_local
    │   ├── conf
    │   │   ├── admin-extra.html
    │   │   ├── admin-extra.menu-bottom.html
    │   │   ├── admin-extra.menu-top.html
    │   │   ├── currency.xml
    │   │   ├── elevate.xml
    │   │   ├── lang
    │   │   │   ├── contractions_ca.txt
    │   │   │   ├── contractions_fr.txt
    │   │   │   ├── contractions_ga.txt
    │   │   │   ├── contractions_it.txt
    │   │   │   ├── hyphenations_ga.txt
    │   │   │   ├── stemdict_nl.txt
    │   │   │   ├── stoptags_ja.txt
    │   │   │   ├── stopwords_ar.txt
    │   │   │   ├── stopwords_bg.txt
    │   │   │   ├── stopwords_ca.txt
    │   │   │   ├── stopwords_cz.txt
    │   │   │   ├── stopwords_da.txt
    │   │   │   ├── stopwords_de.txt
    │   │   │   ├── stopwords_el.txt
    │   │   │   ├── stopwords_en.txt
    │   │   │   ├── stopwords_es.txt
    │   │   │   ├── stopwords_eu.txt
    │   │   │   ├── stopwords_fa.txt
    │   │   │   ├── stopwords_fi.txt
    │   │   │   ├── stopwords_fr.txt
    │   │   │   ├── stopwords_ga.txt
    │   │   │   ├── stopwords_gl.txt
    │   │   │   ├── stopwords_hi.txt
    │   │   │   ├── stopwords_hu.txt
    │   │   │   ├── stopwords_hy.txt
    │   │   │   ├── stopwords_id.txt
    │   │   │   ├── stopwords_it.txt
    │   │   │   ├── stopwords_ja.txt
    │   │   │   ├── stopwords_lv.txt
    │   │   │   ├── stopwords_nl.txt
    │   │   │   ├── stopwords_no.txt
    │   │   │   ├── stopwords_pt.txt
    │   │   │   ├── stopwords_ro.txt
    │   │   │   ├── stopwords_ru.txt
    │   │   │   ├── stopwords_sv.txt
    │   │   │   ├── stopwords_th.txt
    │   │   │   ├── stopwords_tr.txt
    │   │   │   └── userdict_ja.txt
    │   │   ├── mapping-FoldToASCII.txt
    │   │   ├── mapping-ISOLatin1Accent.txt
    │   │   ├── protwords.txt
    │   │   ├── schema.xml
    │   │   ├── scripts.conf
    │   │   ├── solrconfig.xml
    │   │   ├── spellings.txt
    │   │   ├── stopwords.txt
    │   │   ├── synonyms.txt
    │   │   ├── update-script.js
    │   │   ├── velocity
    │   │   │   ├── VM_global_library.vm
    │   │   │   ├── browse.vm
    │   │   │   ├── cluster.vm
    │   │   │   ├── clusterResults.vm
    │   │   │   ├── debug.vm
    │   │   │   ├── did_you_mean.vm
    │   │   │   ├── facet_fields.vm
    │   │   │   ├── facet_pivot.vm
    │   │   │   ├── facet_queries.vm
    │   │   │   ├── facet_ranges.vm
    │   │   │   ├── facets.vm
    │   │   │   ├── footer.vm
    │   │   │   ├── head.vm
    │   │   │   ├── header.vm
    │   │   │   ├── hit.vm
    │   │   │   ├── hitGrouped.vm
    │   │   │   ├── join-doc.vm
    │   │   │   ├── jquery.autocomplete.css
    │   │   │   ├── jquery.autocomplete.js
    │   │   │   ├── layout.vm
    │   │   │   ├── main.css
    │   │   │   ├── product-doc.vm
    │   │   │   ├── query.vm
    │   │   │   ├── queryGroup.vm
    │   │   │   ├── querySpatial.vm
    │   │   │   ├── richtext-doc.vm
    │   │   │   ├── suggest.vm
    │   │   │   └── tabs.vm
    │   │   └── xslt
    │   │   │   ├── example.xsl
    │   │   │   ├── example_atom.xsl
    │   │   │   ├── example_rss.xsl
    │   │   │   ├── luke.xsl
    │   │   │   └── updateXml.xsl
    │   └── reviews.conf
    └── yelp_40.csv
├── spark
    └── bikeshare
    │   ├── 201408_weather_data.csv
    │   ├── README.md
    │   ├── index_data.csv
    │   ├── notebook.txt
    │   └── weather-data.spark.hue.json
├── sqoop2
    ├── README.md
    ├── create_table.sql
    └── stats.pig
└── static
    └── hue-3.5.png


/README.md:
--------------------------------------------------------------------------------
 1 | Hadoop Tutorials and Examples
 2 | =============================
 3 | 
 4 | Source, data and tutorials of the Hue video series, the [Web UI for Apache Hadoop](http://gethue.com).
 5 | Follow [@gethue](https://twitter.com/gethue)
 6 | 
 7 | List of all [tutorials](http://gethue.com/category/tutorial/).
 8 | 
 9 | Search
10 | - [Hadoop search: Dynamic search dashboards with Solr](http://gethue.com/hadoop-search-dynamic-search-dashboards-with-solr/)
11 | - [Analyse Apache logs and build your own Web Analytics dashboard with Hadoop and Solr](http://gethue.com/analyse-apache-logs-and-build-your-own-web-analytics-dashboard-with-hadoop-and-solr/)
12 | 
13 | Spark
14 | - [Get started with Spark: deploy Spark Server and compute Pi from your Web Browser](http://gethue.com/get-started-with-spark-deploy-spark-server-and-compute-pi-from-your-web-browser/)
15 | 
16 | Hive, HBase, Pig 
17 | - [Get started with Hadoop: Collect and Analyse Twitter data](http://gethue.com/how-to-analyze-twitter-data-with-hue) | [Hive, Flume, HDFS, Oozie](https://github.com/romainr/cdh-twitter-example)
18 | - [How to access Hive in Pig with HCatalog in Hue](http://gethue.com/hadoop-tutorial-how-to-access-hive-in-pig-with) | [HCatalog](hcatalog)
19 | - [High Availability of Hue](http://gethue.com/hadoop-tutorial-high-availability-of-hue) | [Hue HA](hue-ha)
20 | - [How to create example tables in HBase](http://gethue.com/hadoop-tutorial-how-to-create-example-tables-in-hbase) | [HBase tables](hbase-tables)
21 | - [Build and use a Hive UDF in 1 minute](http://gethue.com/hadoop-tutorial-hive-udf-in-1-minute) | [Hive UDF](hive-udf)
22 | - [The Web UI for HBase: HBase Browser](http://gethue.com/the-web-ui-for-hbase-hbase-browser)
23 | 
24 | Season 2
25 | - [1. Prepare the data for analysis with Pig and Python UDF](http://gethue.com/hadoop-tutorials-ii-1-prepare-the-data-for-analysis) | [Pig Python UDF and Json](pig-json-python-udf)
26 | - [2. Execute Hive queries and schedule them with Oozie](http://gethue.com/video-series-ii-2-execute-hive-queries-and-schedule)
27 | - [3. Schedule Hive queries with Oozie coordinators](http://gethue.com/hadoop-tutorials-ii-3-schedule-hive-queries-with)
28 | - [4. Fast SQL with the Impala Query Editor](http://gethue.com/fast-sql-with-the-impala-query-editor)
29 | - [5. Bundle Oozie coordinators with Hue](http://gethue.com/hadoop-tutorial-bundle-oozie-coordinators-with-hue)
30 | - [6. Use Pig and Hive with HBase](http://gethue.com/hadoop-tutorial-use-pig-and-hive-with-hbase)
31 | 
32 | ![image](static/hue-3.5.png?raw=true)
33 | 
34 | 


--------------------------------------------------------------------------------
/hbase-hive-pig/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | TBD
5 | 


--------------------------------------------------------------------------------
/hbase-hive-pig/create_hbase_table.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | -- Create table in Hive/HBase
 3 | 
 4 | set hbase.zookeeper.quorum localhost
 5 | 
 6 | CREATE TABLE top_cool_hbase (key string, value map<string, int>)
 7 | STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
 8 | WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,review:")
 9 | TBLPROPERTIES ("hbase.table.name" = "top_cool");
10 | 
11 | 
12 | 
13 | -- Insert data
14 | 
15 | ADD JAR /usr/lib/hive/lib/zookeeper.jar;
16 | ADD JAR /usr/lib/hive/lib/hbase.jar;
17 | ADD JAR /usr/lib/hive/lib/hive-hbase-handler-0.10.0-cdh4.3.0.jar
18 | ADD JAR /usr/lib/hive/lib/guava-11.0.2.jar;
19 | 
20 | INSERT OVERWRITE TABLE top_cool_hbase SELECT name, map(`date`, cast(coolness as int)) FROM top_cool
21 | 
22 | 
23 | -- Insert cooler data: restaurant names
24 | 
25 | INSERT OVERWRITE TABLE top_cool_hbase SELECT name, map(`date`, cast(r.stars as int)) FROM review r join  business b on r.business_id = b.business_id;
26 | 
27 | 


--------------------------------------------------------------------------------
/hbase-hive-pig/load_hbase.pig:
--------------------------------------------------------------------------------
 1 | REGISTER /usr/lib/zookeeper/zookeeper-3.4.5-cdh4.3.0.jar
 2 | REGISTER /usr/lib/hbase/hbase-0.94.6-cdh4.3.0-security.jar
 3 | 
 4 | set hbase.zookeeper.quorum 'localhost'
 5 | 
 6 | data = LOAD 'hbase://top_cool'
 7 |        USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('review:*', '-loadKey true')
 8 |        as (name:CHARARRAY, dates:MAP[]);
 9 |  
10 | counts = 
11 |     FOREACH data
12 |     GENERATE name, dates#'2012-12-02';
13 | 
14 | DUMP counts;
15 | 
16 | 


--------------------------------------------------------------------------------
/hbase-tables/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | [How to create example tables in HBase](http://gethue.com/hadoop-tutorial-how-to-create-example-tables-in-hbase/) | [HBase tables](hbase-tables)
5 | 
6 | 


--------------------------------------------------------------------------------
/hbase-tables/create_schemas.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Generates columns and cell data for an analytics tables of 1000+ columns
 4 | # cf. url
 5 | #
 6 | 
 7 | # create 'analytics', 'hour', 'day', 'total'
 8 | 
 9 | 
10 | import itertools
11 | import random
12 | 
13 | random.seed(1)
14 | 
15 | ROWS = 1000
16 | HOURS = range(0, 25)
17 | DAYS = range(0, 366)
18 | COUNTRIES = ['US', 'France', 'Italy']
19 | FAMILLIES = ['hour', 'day', 'total']
20 | 
21 | 
22 | # Utilities
23 | 
24 | def columns_hours():
25 |   FAMILLY = 'hour'
26 |   cols = []
27 |   for hour in HOURS:
28 |     cols.append('%s:%02d-%s' % (FAMILLY, hour, 'total'))
29 |     for country in COUNTRIES:
30 |       cols.append('%s:%02d-%s' % (FAMILLY, hour, country))
31 |   return cols
32 | 
33 | def columns_days():
34 |   FAMILLY = 'day'
35 |   cols = []
36 |   for day in DAYS:
37 |     cols.append('%s:%03d-%s' % (FAMILLY, day, 'total'))
38 |     for country in COUNTRIES:
39 |       cols.append('%s:%03d-%s' % (FAMILLY, day, country))
40 |   return cols
41 | 
42 | def columns_total():
43 |   FAMILLY = 'total'
44 |   return ['%s:%s' % (FAMILLY, col)  for col in ['total'] + COUNTRIES]
45 | 
46 | def get_domain(n):
47 |   return ['domain.%s' % n]
48 | 
49 | def total():
50 |   return [count_by_country(10000)]
51 | 
52 | def days():
53 |   return [count_by_country(1000) for day in DAYS]
54 | 
55 | def hours():
56 |   return [count_by_country(100) for hour in HOURS]
57 | 
58 | def count_by_country(n):
59 |   counts = [random.randrange(1, n) for country in COUNTRIES]
60 |   return [sum(counts)] + counts
61 | 
62 | def print_columns():
63 |   all_cols = columns_hours() + columns_days() + columns_total()
64 |   print "-Dimporttsv.columns=HBASE_ROW_KEY," + ','.join(['%s' % col for col in all_cols])
65 | 
66 | def generate_data(data_file):
67 |   f = open(data_file, 'w')
68 | 
69 |   for i in xrange(ROWS):
70 |     a = hours() + days() + total()
71 |     f.write('\t'.join(get_domain(i) + map(str, itertools.chain.from_iterable(a))) + '\n')
72 | 
73 |   print data_file + ' genererated'
74 | 
75 | 
76 | # Main
77 | 
78 | #
79 | # Print columns and generate data into a file
80 | #
81 | print_columns()
82 | generate_data('/tmp/hbase-analytics.tsv')
83 | 


--------------------------------------------------------------------------------
/hbase-tables/data/gethue.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/romainr/hadoop-tutorials-examples/a070880c7d44402997080d860bb49cc234dff879/hbase-tables/data/gethue.pdf


--------------------------------------------------------------------------------
/hbase-tables/data/hue-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/romainr/hadoop-tutorials-examples/a070880c7d44402997080d860bb49cc234dff879/hbase-tables/data/hue-logo.png


--------------------------------------------------------------------------------
/hbase-tables/load_binary.py:
--------------------------------------------------------------------------------
 1 | ##
 2 | ## Insert various data into HBase
 3 | ##
 4 | 
 5 | ## cd $HUE_HOME (e.g. cd /usr/share/hue(/opt/cloudera/parcels/CDH-XXXXX/share/hue if using parcels))
 6 | ## build/env/bin/hue shell
 7 | ##
 8 | 
 9 | from hbase.api import HbaseApi
10 | 
11 | HbaseApi().putRow('Cluster', 'events', 'hue-20130801', {'doc:txt': 'Hue is awesome!'})
12 | HbaseApi().putRow('Cluster', 'events', 'hue-20130801', {'doc:json': '{"user": "hue", "coolness": "extra"}'})
13 | HbaseApi().putRow('Cluster', 'events', 'hue-20130802', {'doc:version': '<xml>I like HBase</xml>'})
14 | HbaseApi().putRow('Cluster', 'events', 'hue-20130802', {'doc:version': '<xml>I LOVE HBase</xml>'})
15 | 
16 | 
17 | ## From https://github.com/romainr/hadoop-tutorials-examples
18 | ## cd /tmp
19 | ## git clone https://github.com/romainr/hadoop-tutorials-examples.git
20 | 
21 | root='/tmp/hadoop-tutorials-examples'
22 | 
23 | HbaseApi().putRow('Cluster', 'events', 'hue-20130801', {'doc:img': open(root + '/hbase-tables/data/hue-logo.png', "rb").read()})
24 | HbaseApi().putRow('Cluster', 'events', 'hue-20130801', {'doc:html': open(root + '/hbase-tables/data/gethue.com.html', "rb").read()})
25 | HbaseApi().putRow('Cluster', 'events', 'hue-20130801', {'doc:pdf': open(root + '/hbase-tables/data/gethue.pdf', "rb").read()})
26 | 
27 | 


--------------------------------------------------------------------------------
/hcatalog/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | [How to access Hive in Pig with HCatalog in Hue](http://gethue.tumblr.com/post/56804308712/hadoop-tutorial-how-to-access-hive-in-pig-with)
5 | 
6 | 


--------------------------------------------------------------------------------
/hcatalog/avg_salary.hql:
--------------------------------------------------------------------------------
1 | SELECT AVG(salary) FROM sample_07;
2 | 
3 | 


--------------------------------------------------------------------------------
/hcatalog/avg_salary.pig:
--------------------------------------------------------------------------------
1 | -- Load table 'sample_07'
2 | sample_07 = LOAD 'sample_07' USING org.apache.hcatalog.pig.HCatLoader();
3 | 
4 | -- Compute the average salary of the table
5 | salaries = GROUP sample_07 ALL;
6 | out = FOREACH salaries GENERATE AVG(sample_07.salary);
7 | DUMP out;
8 | 
9 | 


--------------------------------------------------------------------------------
/hive-udf/README.md:
--------------------------------------------------------------------------------
 1 | Blog URL
 2 | ========
 3 | 
 4 | [Build and use a Hive UDF in 1 minute](http://gethue.com/hadoop-tutorial-hive-udf-in-1-minute/)
 5 | 
 6 | 
 7 | How to
 8 | ======
 9 | 
10 | - Just use the precompiled [myudfs.jar](myudfs.jar)
11 | 
12 | - Or compile it with:
13 | ```
14 | javac -cp $(ls /usr/lib/hive/lib/hive-exec*.jar):/usr/lib/hadoop/hadoop-common.jar org/hue/udf/MyUpper.java
15 | jar -cf myudfs.jar  -C . .
16 | ```
17 | 


--------------------------------------------------------------------------------
/hive-udf/myudfs.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/romainr/hadoop-tutorials-examples/a070880c7d44402997080d860bb49cc234dff879/hive-udf/myudfs.jar


--------------------------------------------------------------------------------
/hive-udf/org/hue/udf/MyUpper.java:
--------------------------------------------------------------------------------
 1 | 
 2 | package org.hue.udf;
 3 | 
 4 | import org.apache.hadoop.hive.ql.exec.UDF;
 5 | import org.apache.hadoop.io.Text;
 6 |  
 7 | 
 8 | public final class MyUpper extends UDF {
 9 |   public Text evaluate(final Text s) {
10 |     if (s == null) { return null; }
11 |     return new Text(s.toString().toUpperCase());
12 |   }
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/hive-udf/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 3 |   <!--
 4 |   Licensed to the Apache Software Foundation (ASF) under one
 5 |   or more contributor license agreements.  See the NOTICE file
 6 |   distributed with this work for additional information
 7 |   regarding copyright ownership.  The ASF licenses this file
 8 |   to you under the Apache License, Version 2.0 (the
 9 |   "License"); you may not use this file except in compliance
10 |   with the License.  You may obtain a copy of the License at
11 | 
12 |        http://www.apache.org/licenses/LICENSE-2.0
13 | 
14 |   Unless required by applicable law or agreed to in writing, software
15 |   distributed under the License is distributed on an "AS IS" BASIS,
16 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 |   See the License for the specific language governing permissions and
18 |   limitations under the License.
19 |   -->
20 | 
21 |   <!--
22 |   How to
23 |   
24 |   Do
25 |   mvn install
26 |   Then grab the target jar myudfs-1.0-SNAPSHOT.jar
27 |   -->
28 | 
29 |   <modelVersion>4.0.0</modelVersion>
30 | 
31 |   <groupId>org.hue.udf</groupId>
32 |   <artifactId>myudfs</artifactId>
33 |   <version>1.0-SNAPSHOT</version>
34 |   <packaging>jar</packaging>
35 | 
36 |   <name>myudfs</name>
37 |   <url>http://gethue.com</url>
38 | 
39 |   <properties>
40 |     <hive.version>1.1.0-cdh5.5.0-SNAPSHOT</hive.version>
41 |     <hadoop.version>2.6.0-cdh5.5.0-SNAPSHOT</hadoop.version>
42 |   </properties>
43 | 
44 |   <build>
45 |     <pluginManagement>
46 |       <plugins>
47 |         <plugin>
48 |           <groupId>org.apache.maven.plugins</groupId>
49 |           <artifactId>maven-compiler-plugin</artifactId>
50 |           <version>2.3.2</version>
51 |           <configuration>
52 |             <source>1.6</source>
53 |             <target>1.6</target>
54 |           </configuration>
55 |         </plugin>
56 |       </plugins>
57 |     </pluginManagement>
58 |   </build>
59 | 
60 |   <dependencies>
61 |     <dependency>
62 |       <groupId>org.apache.hive</groupId>
63 |       <artifactId>hive-exec</artifactId>
64 |       <version>${hive.version}</version>
65 |     </dependency>
66 |     <dependency>
67 |       <groupId>org.apache.hadoop</groupId>
68 |       <artifactId>hadoop-common</artifactId>
69 |       <version>${hadoop.version}</version>
70 |     </dependency>
71 |   </dependencies>
72 | 
73 |   <repositories>
74 |     <repository>
75 |       <id>cloudera</id>
76 |       <url>https://repository.cloudera.com/content/repositories/snapshots</url>
77 |       <snapshots>
78 |         <enabled>true</enabled>
79 |       </snapshots>
80 |     </repository>
81 |   </repositories>
82 | </project>
83 | 


--------------------------------------------------------------------------------
/hive-workflow/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | TBD
5 | 


--------------------------------------------------------------------------------
/hive-workflow/create_table.hql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE top_cool AS
 2 | SELECT r.business_id, name, SUM(cool) AS coolness, '$date' as `date`
 3 | FROM review r JOIN business b
 4 | ON (r.business_id = b.business_id)
 5 | WHERE categories LIKE '%Restaurants%'
 6 | AND `date` = '$date'
 7 | GROUP BY r.business_id, name
 8 | ORDER BY coolness DESC
 9 | LIMIT 10
10 | 
11 | 


--------------------------------------------------------------------------------
/hive-workflow/insert_table.hql:
--------------------------------------------------------------------------------
 1 | INSERT INTO TABLE top_cool
 2 | SELECT r.business_id, name, SUM(cool) AS coolness, '${date}' as `date`
 3 | FROM review r JOIN business b
 4 | ON (r.business_id = b.business_id)
 5 | WHERE categories LIKE '%Restaurants%'
 6 | AND `date` = '${date}'
 7 | GROUP BY r.business_id, name
 8 | ORDER BY coolness DESC
 9 | LIMIT 10
10 | 
11 | 


--------------------------------------------------------------------------------
/hive-workflow/select_table.hql:
--------------------------------------------------------------------------------
1 | SELECT r.business_id, name, SUM(cool) AS coolness, '$date' as `date`
2 | FROM review r JOIN business b
3 | ON (r.business_id = b.business_id)
4 | WHERE categories LIKE '%Restaurants%'
5 | AND `date` = '$date'
6 | GROUP BY r.business_id, name
7 | ORDER BY coolness DESC
8 | LIMIT 10
9 | 


--------------------------------------------------------------------------------
/hive-workflow/select_top_n.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE top_cool AS
 2 | SELECT r.business_id, name, SUM(cool) AS coolness, '${date}' as `date`
 3 | FROM review r JOIN business b
 4 | ON (r.business_id = b.business_id)
 5 | WHERE categories LIKE '%Restaurants%'
 6 | AND `date` = '${date}'
 7 | GROUP BY r.business_id, name
 8 | ORDER BY coolness DESC
 9 | LIMIT ${n}
10 | 


--------------------------------------------------------------------------------
/hue-ha/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | [High Availability of Hue](http://gethue.com/automatic-high-availability-with-hue-and-cloudera-manager/) | [Hue HA](hue-ha)
5 | 
6 | 


--------------------------------------------------------------------------------
/hue-ha/howto.txt:
--------------------------------------------------------------------------------
 1 | 1. Install rsyslogd, edit its config file (/etc/rsyslog.conf), add the following lines at the very bottom, then restart rsyslogd.
 2 | $ModLoad imudp $UDPServerAddress 127.0.0.1 $UDPServerRun 514
 3 | 
 4 | 2. Add the following to /etc/rsyslog.d/haproxy.conf:
 5 | local6.*                                                /var/log/haproxy.log
 6 | 
 7 | 3. Create the file /tmp/haproxy.conf and add the following lines to it:
 8 | 
 9 | global
10 | 
11 |    daemon
12 | 
13 |    nbproc 1
14 | 
15 |    maxconn 100000
16 | 
17 |    log 127.0.0.1 local6 debug
18 | 
19 | defaults
20 | 
21 |    option http-server-close
22 | 
23 |    mode http
24 | 
25 |    timeout http-request 5s
26 | 
27 |    timeout connect 5s
28 | 
29 |    timeout server 10s
30 | 
31 |    timeout client 10s
32 | 
33 | listen Hue 0.0.0.0:80
34 | 
35 |    log global
36 | 
37 |    mode http
38 | 
39 |    stats enable
40 | 
41 |    balance source
42 | 
43 |    server hue1 hue-ha-test1-1.ent.cloudera.com:8888 cookie ServerA check inter 2000 fall 3
44 |    server hue2 hue-ha-test1-2.ent.cloudera.com:8888 cookie ServerB check inter 2000 fall 3
45 | 
46 | 4. Download and gunzip HAProxy from here: http://haproxy.1wt.eu/download/1.4/bin/haproxy-1.4.24-pcre-40kses-linux-i586.stripped.gz.
47 | 
48 | 5. Run ./haproxy-1.4.24-pcre-40kses-linux-i586.stripped -f /tmp/haproxy.conf
49 | 
50 | 6. Goto localhost:7000 and see that Hue is running. Show /var/log/haproxy.log.
51 | 
52 | 7. Go to http://hue-ha-test1-1.ent.cloudera.com:7180/ (admin:admin) and stop the Hue instance that you're on (probably will have to stop each instance to figure that out).
53 | 
54 | 8. Go to Hue (it should be down once). Refresh and show Hue is back up. Show /var/log/haproxy.log.
55 | 
56 | 


--------------------------------------------------------------------------------
/hue-saml/shibboleth-conf/attribute-filter.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!-- 
 3 |     This file is an EXAMPLE policy file.  While the policy presented in this 
 4 |     example file is functional, it isn't very interesting.
 5 |     
 6 |     Deployers should refer to the Shibboleth 2 documentation for a complete list of components 
 7 |     and their options.
 8 | -->
 9 | <afp:AttributeFilterPolicyGroup id="ShibbolethFilterPolicy"
10 |                                 xmlns:afp="urn:mace:shibboleth:2.0:afp" xmlns:basic="urn:mace:shibboleth:2.0:afp:mf:basic" 
11 |                                 xmlns:saml="urn:mace:shibboleth:2.0:afp:mf:saml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
12 |                                 xsi:schemaLocation="urn:mace:shibboleth:2.0:afp classpath:/schema/shibboleth-2.0-afp.xsd
13 |                                                     urn:mace:shibboleth:2.0:afp:mf:basic classpath:/schema/shibboleth-2.0-afp-mf-basic.xsd
14 |                                                     urn:mace:shibboleth:2.0:afp:mf:saml classpath:/schema/shibboleth-2.0-afp-mf-saml.xsd">
15 | 
16 |     <!--  Release the transient ID to anyone -->
17 |     <afp:AttributeFilterPolicy id="releaseTransientIdToAnyone">
18 |         <afp:PolicyRequirementRule xsi:type="basic:ANY"/>
19 | 
20 |         <afp:AttributeRule attributeID="transientId">
21 |             <afp:PermitValueRule xsi:type="basic:ANY"/>
22 |         </afp:AttributeRule>
23 | 
24 |     </afp:AttributeFilterPolicy>
25 | 
26 |     <afp:AttributeFilterPolicy id="releaseUidToAnyone">
27 |         <afp:PolicyRequirementRule xsi:type="basic:ANY"/>
28 | 
29 |         <afp:AttributeRule attributeID="uid">
30 |             <afp:PermitValueRule xsi:type="basic:ANY"/>
31 |         </afp:AttributeRule>
32 | 
33 |     </afp:AttributeFilterPolicy>
34 | 
35 |     <!-- 
36 |         Release eduPersonEntitlement and the permissible values of eduPersonAffiliation
37 |         to three specific SPs
38 |     -->
39 |     <!--
40 |     <afp:AttributeFilterPolicy>
41 |         <afp:PolicyRequirementRule xsi:type="basic:OR">
42 |             <basic:Rule xsi:type="basic:AttributeRequesterString" value="urn:example.org:sp:Portal" />
43 |             <basic:Rule xsi:type="basic:AttributeRequesterString" value="urn:example.org:sp:SIS" />
44 |             <basic:Rule xsi:type="basic:AttributeRequesterString" value="urn:example.org:sp:LMS" />
45 |         </afp:PolicyRequirementRule>
46 | 
47 |         <afp:AttributeRule attributeID="eduPersonAffiliation">
48 |             <afp:PermitValueRule xsi:type="basic:OR">
49 |                 <basic:Rule xsi:type="basic:AttributeValueString" value="faculty" ignoreCase="true" />
50 |                 <basic:Rule xsi:type="basic:AttributeValueString" value="student" ignoreCase="true" />
51 |                 <basic:Rule xsi:type="basic:AttributeValueString" value="staff" ignoreCase="true" />
52 |                 <basic:Rule xsi:type="basic:AttributeValueString" value="alum" ignoreCase="true" />
53 |                 <basic:Rule xsi:type="basic:AttributeValueString" value="member" ignoreCase="true" />
54 |                 <basic:Rule xsi:type="basic:AttributeValueString" value="affiliate" ignoreCase="true" />
55 |                 <basic:Rule xsi:type="basic:AttributeValueString" value="employee" ignoreCase="true" />
56 |                 <basic:Rule xsi:type="basic:AttributeValueString" value="library-walk-in" ignoreCase="true" />
57 |             </afp:PermitValueRule>
58 |         </afp:AttributeRule>
59 | 
60 |     </afp:AttributeFilterPolicy>
61 |     -->
62 | 
63 |     <!-- 
64 |         Release the given name of the user to our portal service provider
65 |     -->
66 |     <!--
67 |     <afp:AttributeFilterPolicy>
68 |         <afp:PolicyRequirementRule xsi:type="basic:AttributeRequesterString" value="urn:example.org:sp:myPortal" />
69 | 
70 |         <afp:AttributeRule attributeID="givenName">
71 |             <afp:PermitValueRule xsi:type="basic:ANY" />
72 |         </afp:AttributeRule>
73 |     </afp:AttributeFilterPolicy>
74 |     -->
75 | 
76 | </afp:AttributeFilterPolicyGroup>
77 | 


--------------------------------------------------------------------------------
/hue-saml/shibboleth-conf/logging.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration>
 3 |     
 4 |     <!--
 5 |         Loggers define indicate which packages/categories are logged, at which level, and to which appender.
 6 |         Levels: OFF, ERROR, WARN, INFO, ALL, TRACE, ALL
 7 |     -->
 8 |     <!-- Logs IdP, but not OpenSAML, messages -->
 9 |     <logger name="edu.internet2.middleware.shibboleth" level="ALL"/>
10 | 
11 |     <!-- Logs OpenSAML, but not IdP, messages -->
12 |     <logger name="org.opensaml" level="ALL"/>
13 |     
14 |     <!-- Logs LDAP related messages -->
15 |     <logger name="edu.vt.middleware.ldap" level="ALL"/>
16 |     
17 |     <!-- Logs inbound and outbound protocols messages at ALL level -->
18 |     <!--
19 |     <logger name="PROTOCOL_MESSAGE" level="ALL" />
20 |     -->
21 |     
22 |     <!-- 
23 |         Normally you should not edit below this point.  These default configurations are sufficient for 
24 |         almost every system.
25 |     -->
26 | 
27 |     <!-- 
28 |         Logging appenders define where and how logging messages are logged.
29 |      -->
30 |     <appender name="IDP_ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
31 |         <File>/opt/shibboleth-idp/logs/idp-access.log</File>
32 | 
33 |         <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
34 |             <FileNamePattern>/opt/shibboleth-idp/logs/idp-access-%d{yyyy-MM-dd}.log</FileNamePattern>
35 |         </rollingPolicy>
36 | 
37 |         <encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
38 |             <charset>UTF-8</charset>
39 |             <Pattern>%msg%n</Pattern>
40 |         </encoder>
41 |     </appender>
42 | 
43 |     <appender name="IDP_AUDIT" class="ch.qos.logback.core.rolling.RollingFileAppender">
44 |         <File>/opt/shibboleth-idp/logs/idp-audit.log</File>
45 | 
46 |         <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
47 |             <FileNamePattern>/opt/shibboleth-idp/logs/idp-audit-%d{yyyy-MM-dd}.log</FileNamePattern>
48 |         </rollingPolicy>
49 | 
50 |         <encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
51 |             <charset>UTF-8</charset>
52 |             <Pattern>%msg%n</Pattern>
53 |         </encoder>
54 |     </appender>
55 | 
56 |     <appender name="IDP_PROCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
57 |         <File>/opt/shibboleth-idp/logs/idp-process.log</File>
58 |         
59 |         <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
60 |             <FileNamePattern>/opt/shibboleth-idp/logs/idp-process-%d{yyyy-MM-dd}.log</FileNamePattern>
61 |         </rollingPolicy>
62 | 
63 |         <encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
64 |             <charset>UTF-8</charset>
65 |             <Pattern>%date{HH:mm:ss.SSS} - %level [%logger:%line] - %msg%n</Pattern>
66 |         </encoder>
67 |     </appender>
68 |   
69 |     <logger name="Shibboleth-Access" level="ALL">
70 |         <appender-ref ref="IDP_ACCESS"/>
71 |     </logger>
72 |     
73 |     <logger name="Shibboleth-Audit" level="ALL">
74 |         <appender-ref ref="IDP_AUDIT"/>
75 |     </logger>
76 |         
77 |     <logger name="org.springframework" level="OFF"/>
78 |     
79 |     <logger name="org.apache.catalina" level="ERROR"/>
80 | 
81 |     <root level="ERROR">
82 |         <appender-ref ref="IDP_PROCESS"/>
83 |     </root>
84 | 
85 | </configuration>
86 | 


--------------------------------------------------------------------------------
/hue-saml/shibboleth-conf/login.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This is the JAAS configuration file used by the Shibboleth IdP.
 3 |   
 4 |   A JAAS configuration file is a grouping of LoginModules defined in the following manner:
 5 |   <LoginModuleClass> <Flag> <ModuleOptions>;
 6 |   
 7 |   LoginModuleClass - fully qualified class name of the LoginModule class
 8 |   Flag             - indicates whether the requirement level for the modules; 
 9 |                          allowed values: required, requisite, sufficient, optional
10 |   ModuleOptions    - a space delimited list of name="value" options
11 |   
12 |   For complete documentation on the format of this file see:
13 |   http://java.sun.com/j2se/1.5.0/docs/api/javax/security/auth/login/Configuration.html
14 |   
15 |   For LoginModules available within the Sun JVM see:
16 |   http://java.sun.com/j2se/1.5.0/docs/guide/security/jaas/tutorials/LoginConfigFile.html
17 |   
18 |   Warning: Do NOT use Sun's JNDI LoginModule to authentication against an LDAP directory,
19 |   Use the LdapLoginModule that ships with Shibboleth and is demonstrated below.
20 | 
21 |   Note, the application identifier MUST be ShibUserPassAuth
22 | */
23 | 
24 | 
25 | ShibUserPassAuth {
26 | 
27 | // Example LDAP authentication
28 | // See: https://wiki.shibboleth.net/confluence/display/SHIB2/IdPAuthUserPass
29 | 
30 |    edu.vt.middleware.ldap.jaas.LdapLoginModule required
31 |       ldapUrl="ldap://localhost:1389"
32 |       baseDn="ou=People,dc=example,dc=com"
33 |       ssl="false"
34 |       userFilter="uid={0}";
35 | 
36 | 
37 | // Example Kerberos authentication, requires Sun's JVM
38 | // See: https://wiki.shibboleth.net/confluence/display/SHIB2/IdPAuthUserPass
39 | /*
40 |    com.sun.security.auth.module.Krb5LoginModule required
41 |       useKeyTab="true"
42 |       keyTab="/path/to/idp/keytab/file";
43 | */
44 | 
45 | };
46 | 


--------------------------------------------------------------------------------
/hue-saml/shibboleth-conf/service.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <srv:Services xmlns:srv="urn:mace:shibboleth:2.0:services" xmlns:attribute-afp="urn:mace:shibboleth:2.0:afp" 
 3 |               xmlns:attribute-authority="urn:mace:shibboleth:2.0:attribute:authority" xmlns:attribute-resolver="urn:mace:shibboleth:2.0:resolver" 
 4 |               xmlns:profile="urn:mace:shibboleth:2.0:idp:profile-handler" xmlns:relyingParty="urn:mace:shibboleth:2.0:relying-party" 
 5 |               xmlns:resource="urn:mace:shibboleth:2.0:resource" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
 6 |               xsi:schemaLocation="urn:mace:shibboleth:2.0:services classpath:/schema/shibboleth-2.0-services.xsd
 7 |                                   urn:mace:shibboleth:2.0:afp classpath:/schema/shibboleth-2.0-afp.xsd
 8 |                                   urn:mace:shibboleth:2.0:attribute:authority classpath:/schema/shibboleth-2.0-attribute-authority.xsd
 9 |                                   urn:mace:shibboleth:2.0:resolver classpath:/schema/shibboleth-2.0-attribute-resolver.xsd
10 |                                   urn:mace:shibboleth:2.0:idp:profile-handler classpath:/schema/shibboleth-2.0-idp-profile-handler.xsd
11 |                                   urn:mace:shibboleth:2.0:relying-party classpath:/schema/shibboleth-2.0-relying-party.xsd
12 |                                   urn:mace:shibboleth:2.0:resource classpath:/schema/shibboleth-2.0-resource.xsd">
13 | 
14 |     <srv:Service id="shibboleth.AttributeResolver" xsi:type="attribute-resolver:ShibbolethAttributeResolver">
15 |         <srv:ConfigurationResource file="/opt/shibboleth-idp/conf/attribute-resolver.xml" xsi:type="resource:FilesystemResource"/>
16 |     </srv:Service>
17 | 
18 |     <srv:Service id="shibboleth.AttributeFilterEngine" xsi:type="attribute-afp:ShibbolethAttributeFilteringEngine">
19 |         <srv:ConfigurationResource file="/opt/shibboleth-idp/conf/attribute-filter.xml" xsi:type="resource:FilesystemResource"/>
20 |     </srv:Service>
21 |     
22 |     <srv:Service id="shibboleth.SAML1AttributeAuthority" xsi:type="attribute-authority:SAML1AttributeAuthority" 
23 |                  depends-on="shibboleth.AttributeResolver shibboleth.AttributeFilterEngine" 
24 |                  resolver="shibboleth.AttributeResolver" filter="shibboleth.AttributeFilterEngine"/>
25 |              
26 |     <srv:Service id="shibboleth.SAML2AttributeAuthority" xsi:type="attribute-authority:SAML2AttributeAuthority" 
27 |                  depends-on="shibboleth.AttributeResolver shibboleth.AttributeFilterEngine" 
28 |                  resolver="shibboleth.AttributeResolver" filter="shibboleth.AttributeFilterEngine"/>
29 | 
30 |     <srv:Service id="shibboleth.RelyingPartyConfigurationManager" xsi:type="relyingParty:SAMLMDRelyingPartyConfigurationManager" 
31 |                  depends-on="shibboleth.SAML1AttributeAuthority shibboleth.SAML2AttributeAuthority">
32 |         <srv:ConfigurationResource file="/opt/shibboleth-idp/conf/relying-party.xml" xsi:type="resource:FilesystemResource"/>
33 |     </srv:Service>
34 | 
35 |     <srv:Service id="shibboleth.HandlerManager" depends-on="shibboleth.RelyingPartyConfigurationManager" xsi:type="profile:IdPProfileHandlerManager">
36 |         <srv:ConfigurationResource file="/opt/shibboleth-idp/conf/handler.xml" xsi:type="resource:FilesystemResource"/>
37 |     </srv:Service>
38 |     
39 |     <!-- 
40 |         A special service that exports all services upon which it depends into the ServletContext as an attribute 
41 |         with the same name as the service's ID.
42 |     -->
43 |     <srv:Service id="shibboleth.ServiceServletContextAttributeExporter" xsi:type="srv:ServletContextAttributeExporter"
44 |                  depends-on="shibboleth.AttributeResolver shibboleth.AttributeFilterEngine 
45 |                              shibboleth.SAML1AttributeAuthority shibboleth.SAML2AttributeAuthority
46 |                              shibboleth.RelyingPartyConfigurationManager shibboleth.HandlerManager 
47 |                              shibboleth.StorageService" />
48 | </srv:Services>


--------------------------------------------------------------------------------
/impala/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | TBD
5 | 


--------------------------------------------------------------------------------
/impala/avro_converter.pig:
--------------------------------------------------------------------------------
 1 | REGISTER piggybank.jar
 2 | 
 3 | 
 4 | data = load '/user/hive/warehouse/review/yelp_academic_dataset_review_clean.json'
 5 |     AS (funny:INT, useful:INT, cool:INT, user_id:CHARARRAY, review_id:CHARARRAY,
 6 |         stars:INT, text:CHARARRAY, business_id:CHARARRAY, date:CHARARRAY, type:CHARARRAY);
 7 |    
 8 | data_clean = 
 9 |   FILTER data
10 |   BY funny IS NOT NULL 
11 |   AND useful IS NOT NULL
12 |   AND cool IS NOT NULL
13 |   AND user_id IS NOT NULL
14 |   AND review_id IS NOT NULL
15 |   AND business_id IS NOT NULL
16 |   AND stars IS NOT NULL
17 |   AND date IS NOT NULL
18 |   AND type IS NOT NULL; 
19 |     
20 | STORE data_clean INTO 'impala/reviews_avro'
21 | USING org.apache.pig.piggybank.storage.avro.AvroStorage(
22 | '{
23 | "schema": {
24 |    "name": "review",
25 |    "type": "record",
26 |    "fields": [
27 |       {"name":"funny", "type":"int"},
28 |       {"name":"useful", "type":"int"},
29 |       {"name":"cool", "type":"int"},
30 |       {"name":"user_id", "type":"string"}
31 |       {"name":"review_id", "type":"string"},
32 |       {"name":"stars", "type":"int"},
33 |       {"name":"text", "type":"string"},
34 |       {"name":"business_id", "type":"string"},
35 |       {"name":"date", "type":"string"},      
36 |       {"name":"type", "type":"string"},
37 |    ]}
38 | }'); 
39 | 
40 | 


--------------------------------------------------------------------------------
/impala/create_avro_table.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE review_avro
 2 | ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
 3 | STORED AS
 4 | inputformat 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
 5 | outputformat 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
 6 | LOCATION '/user/romain/impala/reviews_avro'
 7 | tblproperties ('avro.schema.literal'='{
 8 |    "name": "review",
 9 |    "type": "record",
10 |    "fields": [
11 |       {"name":"business_id", "type":"string"},
12 |       {"name":"cool", "type":"int"},
13 |       {"name":"date", "type":"string"},
14 |       {"name":"funny", "type":"int"},
15 |       {"name":"review_id", "type":"string"},
16 |       {"name":"stars", "type":"int"},
17 |       {"name":"text", "type":"string"},
18 |       {"name":"type", "type":"string"},
19 |       {"name":"useful", "type":"int"},
20 |       {"name":"user_id", "type":"string"}]}');
21 | 
22 | 


--------------------------------------------------------------------------------
/impala/create_parquet_table.sql:
--------------------------------------------------------------------------------
1 |       
2 | CREATE TABLE review_parquet LIKE review STORED AS PARQUETFILE;
3 | 
4 | INSERT OVERWRITE review_parquet  SELECT * FROM review;
5 | 
6 | 


--------------------------------------------------------------------------------
/impala/sample_tables/alltypes.sql:
--------------------------------------------------------------------------------
 1 | CREATE DATABASE IF NOT EXISTS functional;
 2 | DROP TABLE IF EXISTS functional.alltypes;
 3 | CREATE EXTERNAL TABLE IF NOT EXISTS functional.alltypes (
 4 | id int COMMENT 'Add a comment',
 5 | bool_col boolean,
 6 | tinyint_col tinyint,
 7 | smallint_col smallint,
 8 | int_col int,
 9 | bigint_col bigint,
10 | float_col float,
11 | double_col double,
12 | date_string_col string,
13 | string_col string,
14 | timestamp_col timestamp)
15 | PARTITIONED BY (year int, month int)
16 | ROW FORMAT delimited fields terminated by ','  escaped by '\\'
17 | STORED AS TEXTFILE
18 | LOCATION '/user/admin/alltypes/alltypes';
19 | 
20 | USE functional;
21 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=1);
22 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=2);
23 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=3);
24 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=4);
25 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=5);
26 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=6);
27 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=7);
28 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=8);
29 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=9);
30 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=10);
31 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=11);
32 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2009, month=12);
33 | 
34 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=1);
35 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=2);
36 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=3);
37 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=4);
38 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=5);
39 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=6);
40 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=7);
41 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=8);
42 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=9);
43 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=10);
44 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=11);
45 | ALTER TABLE alltypes ADD IF NOT EXISTS PARTITION(year=2010, month=12);
46 | 


--------------------------------------------------------------------------------
/impala/sample_tables/alltypes.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/romainr/hadoop-tutorials-examples/a070880c7d44402997080d860bb49cc234dff879/impala/sample_tables/alltypes.zip


--------------------------------------------------------------------------------
/impala/sample_tables/table_100_cols/create.sql:
--------------------------------------------------------------------------------
  1 | CREATE EXTERNAL TABLE IF NOT EXISTS alltypes_100 (
  2 | id_0 int COMMENT 'Add a comment',
  3 | bool_col_0 boolean,
  4 | tinyint_col_0 tinyint,
  5 | smallint_col_0 smallint,
  6 | int_col_0 int,
  7 | bigint_col_0 bigint,
  8 | float_col_0 float,
  9 | double_col_0 double,
 10 | date_string_col_0 string,
 11 | string_col_0 string,
 12 | timestamp_col_0 timestamp,
 13 | id_1 int COMMENT 'Add a comment',
 14 | bool_col_1 boolean,
 15 | tinyint_col_1 tinyint,
 16 | smallint_col_1 smallint,
 17 | int_col_1 int,
 18 | bigint_col_1 bigint,
 19 | float_col_1 float,
 20 | double_col_1 double,
 21 | date_string_col_1 string,
 22 | string_col_1 string,
 23 | timestamp_col_1 timestamp,
 24 | id_2 int COMMENT 'Add a comment',
 25 | bool_col_2 boolean,
 26 | tinyint_col_2 tinyint,
 27 | smallint_col_2 smallint,
 28 | int_col_2 int,
 29 | bigint_col_2 bigint,
 30 | float_col_2 float,
 31 | double_col_2 double,
 32 | date_string_col_2 string,
 33 | string_col_2 string,
 34 | timestamp_col_2 timestamp,
 35 | id_3 int COMMENT 'Add a comment',
 36 | bool_col_3 boolean,
 37 | tinyint_col_3 tinyint,
 38 | smallint_col_3 smallint,
 39 | int_col_3 int,
 40 | bigint_col_3 bigint,
 41 | float_col_3 float,
 42 | double_col_3 double,
 43 | date_string_col_3 string,
 44 | string_col_3 string,
 45 | timestamp_col_3 timestamp,
 46 | id_4 int COMMENT 'Add a comment',
 47 | bool_col_4 boolean,
 48 | tinyint_col_4 tinyint,
 49 | smallint_col_4 smallint,
 50 | int_col_4 int,
 51 | bigint_col_4 bigint,
 52 | float_col_4 float,
 53 | double_col_4 double,
 54 | date_string_col_4 string,
 55 | string_col_4 string,
 56 | timestamp_col_4 timestamp,
 57 | id_5 int COMMENT 'Add a comment',
 58 | bool_col_5 boolean,
 59 | tinyint_col_5 tinyint,
 60 | smallint_col_5 smallint,
 61 | int_col_5 int,
 62 | bigint_col_5 bigint,
 63 | float_col_5 float,
 64 | double_col_5 double,
 65 | date_string_col_5 string,
 66 | string_col_5 string,
 67 | timestamp_col_5 timestamp,
 68 | id_6 int COMMENT 'Add a comment',
 69 | bool_col_6 boolean,
 70 | tinyint_col_6 tinyint,
 71 | smallint_col_6 smallint,
 72 | int_col_6 int,
 73 | bigint_col_6 bigint,
 74 | float_col_6 float,
 75 | double_col_6 double,
 76 | date_string_col_6 string,
 77 | string_col_6 string,
 78 | timestamp_col_6 timestamp,
 79 | id_7 int COMMENT 'Add a comment',
 80 | bool_col_7 boolean,
 81 | tinyint_col_7 tinyint,
 82 | smallint_col_7 smallint,
 83 | int_col_7 int,
 84 | bigint_col_7 bigint,
 85 | float_col_7 float,
 86 | double_col_7 double,
 87 | date_string_col_7 string,
 88 | string_col_7 string,
 89 | timestamp_col_7 timestamp,
 90 | id_8 int COMMENT 'Add a comment',
 91 | bool_col_8 boolean,
 92 | tinyint_col_8 tinyint,
 93 | smallint_col_8 smallint,
 94 | int_col_8 int,
 95 | bigint_col_8 bigint,
 96 | float_col_8 float,
 97 | double_col_8 double,
 98 | date_string_col_8 string,
 99 | string_col_8 string,
100 | timestamp_col_8 timestamp,
101 | id_9 int COMMENT 'Add a comment',
102 | bool_col_9 boolean,
103 | tinyint_col_9 tinyint,
104 | smallint_col_9 smallint,
105 | int_col_9 int,
106 | bigint_col_9 bigint,
107 | float_col_9 float,
108 | double_col_9 double,
109 | date_string_col_9 string,
110 | string_col_9 string,
111 | timestamp_col_9 timestamp,
112 | id_10 int COMMENT 'Add a comment',
113 | bool_col_10 boolean,
114 | tinyint_col_10 tinyint,
115 | smallint_col_10 smallint,
116 | int_col_10 int,
117 | bigint_col_10 bigint,
118 | float_col_10 float,
119 | double_col_10 double,
120 | date_string_col_10 string,
121 | string_col_10 string,
122 | timestamp_col_10 timestamp
123 | 
124 | )
125 | 
126 | ROW FORMAT delimited fields terminated by ','  escaped by '\\'
127 | STORED AS TEXTFILE
128 | LOCATION '/user/admin/table_100';
129 | 


--------------------------------------------------------------------------------
/impala/select_table.hql:
--------------------------------------------------------------------------------
1 | SELECT r.business_id, name, SUM(cool) AS coolness, '$date' as `date`
2 | FROM review r JOIN business b
3 | ON (r.business_id = b.business_id)
4 | WHERE categories LIKE '%Restaurants%'
5 | AND `date` = '$date'
6 | GROUP BY r.business_id, name
7 | ORDER BY coolness DESC
8 | LIMIT 10
9 | 


--------------------------------------------------------------------------------
/notebook/shared_rdd/README.md:
--------------------------------------------------------------------------------
1 | Blog
2 | ====
3 | 
4 | [How to use the Livy Spark REST Job Server API for sharing Spark RDDs and contexts](http://gethue.com/how-to-use-the-livy-spark-rest-job-server-api-for-sharing-spark-rdds-and-contexts/)
5 | 
6 | Read more on:
7 | http://gethue.com/spark/
8 | 
9 | 


--------------------------------------------------------------------------------
/notebook/shared_rdd/shareable_rdd.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Start a named RDD on a remote Livy PypSpark session that simulates a shared in memory key/value store.
 3 | # To start in a Livy PySpark session.
 4 | 
 5 | class ShareableRdd():
 6 |   
 7 |   def __init__(self):
 8 |     self.data = sc.parallelize([])
 9 | 
10 |   def get(self, key):
11 |     return self.data.filter(lambda row: row[0] == key).take(1)[0]
12 |   
13 |   def set(self, key, value):
14 |     new_key = sc.parallelize([[key, value]])
15 |     self.data = self.data.union(new_key)
16 |     
17 | 
18 | a = ShareableRdd()
19 | 
20 | a.set('ak', 'Alaska')
21 | a.set('ca', 'California')
22 | 
23 | 
24 | a.get('ak')
25 | 
26 | 


--------------------------------------------------------------------------------
/notebook/shared_rdd/shared_rdd.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import requests\n",
12 |     "import json\n",
13 |     "\n",
14 |     "\n",
15 |     "class SharedRdd():\n",
16 |     "  \"\"\"\n",
17 |     "  Perform REST calls to a remote PySpark shell containing a Shared named RDD.\n",
18 |     "  \"\"\"  \n",
19 |     "  def __init__(self, session_url, name):\n",
20 |     "    self.session_url = session_url\n",
21 |     "    self.name = name\n",
22 |     "    \n",
23 |     "  def get(self, key):\n",
24 |     "    return self._curl('%(rdd)s.get(\"%(key)s\")' % {'rdd': self.name, 'key': key})\n",
25 |     "  \n",
26 |     "  def set(self, key, value):\n",
27 |     "    return self._curl('%(rdd)s.set(\"%(key)s\", \"%(value)s\")' % {'rdd': self.name, 'key': key, 'value': value})\n",
28 |     "  \n",
29 |     "  def _curl(self, code):\n",
30 |     "    statements_url = self.session_url + '/statements'\n",
31 |     "    data = {'code': code}\n",
32 |     "    r = requests.post(statements_url, data=json.dumps(data), headers={'Content-Type': 'application/json'})\n",
33 |     "    resp = r.json()\n",
34 |     "    statement_id = str(resp['id'])\n",
35 |     "    while resp['state'] == 'running':\n",
36 |     "      r = requests.get(statements_url + '/' + statement_id)\n",
37 |     "      resp = r.json()  \n",
38 |     "    if 'output' in resp:\n",
39 |     "      return resp['output']['data']\n",
40 |     "    else:\n",
41 |     "      return resp['data']\n"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": null,
47 |    "metadata": {
48 |     "collapsed": true
49 |    },
50 |    "outputs": [],
51 |    "source": [
52 |     "states = SharedRdd('http://localhost:8998/sessions/0', 'states')"
53 |    ]
54 |   },
55 |   {
56 |    "cell_type": "code",
57 |    "execution_count": null,
58 |    "metadata": {
59 |     "collapsed": true
60 |    },
61 |    "outputs": [],
62 |    "source": [
63 |     "print states.get('ak')\n",
64 |     "print states.get('ca')"
65 |    ]
66 |   }
67 |  ],
68 |  "metadata": {
69 |   "kernelspec": {
70 |    "display_name": "Python 2",
71 |    "language": "python",
72 |    "name": "python2"
73 |   },
74 |   "language_info": {
75 |    "codemirror_mode": {
76 |     "name": "ipython",
77 |     "version": 2
78 |    },
79 |    "file_extension": ".py",
80 |    "mimetype": "text/x-python",
81 |    "name": "python",
82 |    "nbconvert_exporter": "python",
83 |    "pygments_lexer": "ipython2",
84 |    "version": "2.7.10"
85 |   }
86 |  },
87 |  "nbformat": 4,
88 |  "nbformat_minor": 0
89 | }
90 | 


--------------------------------------------------------------------------------
/notebook/shared_rdd/shared_rdd.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #
 3 | # Access a named RDD on a remote Livy PypSpark session that simulates a shared in memory key/value store.
 4 | # To type in a regular Python shell.
 5 | # Depends on: pip install requests
 6 | # 
 7 | import requests
 8 | import json
 9 | 
10 | 
11 | class SharedRdd2():
12 |   
13 |   def __init__(self, session_url, name):
14 |     self.session_url = session_url
15 |     self.name = name
16 |     
17 |   def get(self, key):
18 |     return self._curl('%(rdd)s.get("%(key)s")' % {'rdd': self.name, 'key': key})
19 |   
20 |   def set(self, key, value):
21 |     return self._curl('%(rdd)s.set("%(key)s", "%(value)s")' % {'rdd': self.name, 'key': key, 'value': value})
22 |   
23 |   def _curl(self, code):
24 |     statements_url = self.session_url + '/statements'
25 |     data = {'code': code}
26 |     print data
27 |     print statements_url
28 |     r = requests.post(statements_url, data=json.dumps(data), headers={'Content-Type': 'application/json'})
29 |     resp = r.json()
30 |     statement_id = str(resp['id'])
31 |     while resp['state'] == 'running':
32 |       r = requests.get(statements_url + '/' + statement_id)
33 |       resp = r.json()
34 |     if 'output' in resp: # Case Livy returns automatically
35 |       return resp['output']['data']
36 |     else:
37 |       return resp['data']
38 |  
39 | 
40 | states = SharedRdd2('http://localhost:8998/sessions/0', 'states')
41 | 
42 | 
43 | states.get('ak')
44 | states.get('ca')
45 | 


--------------------------------------------------------------------------------
/oozie/credentials/hive-config.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <!--
  3 |   Licensed to the Apache Software Foundation (ASF) under one or more
  4 |   contributor license agreements.  See the NOTICE file distributed with
  5 |   this work for additional information regarding copyright ownership.
  6 |   The ASF licenses this file to You under the Apache License, Version 2.0
  7 |   (the "License"); you may not use this file except in compliance with
  8 |   the License.  You may obtain a copy of the License at
  9 | 
 10 |       http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 |   Unless required by applicable law or agreed to in writing, software
 13 |   distributed under the License is distributed on an "AS IS" BASIS,
 14 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |   See the License for the specific language governing permissions and
 16 |   limitations under the License.
 17 | -->
 18 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 19 | 
 20 | <configuration>
 21 | 
 22 | <!--
 23 |   Note:
 24 | 
 25 |   Configure Hive to use a remote Hive Metastore and update this hive-site.xml.
 26 |   This example can work with a local metatastore but this is not recommended.
 27 | 
 28 |   http://gethue.tumblr.com/post/60937985689/hadoop-tutorials-ii-2-execute-hive-queries-and
 29 | -->
 30 | 
 31 |   <property>
 32 |     <name>hive.metastore.local</name>
 33 |     <value>false</value>
 34 |   </property>
 35 |   <property>
 36 |     <name>hive.metastore.uris</name>
 37 | <value>thrift://test-cdh5-hue.ent.cloudera.com:9083</value>
 38 |   </property>
 39 |   <property>
 40 |     <name>hive.metastore.client.socket.timeout</name>
 41 |     <value>300</value>
 42 |   </property>
 43 |   <property>
 44 |     <name>hive.metastore.warehouse.dir</name>
 45 |     <value>/user/hive/warehouse</value>
 46 |   </property>
 47 |   <property>
 48 |     <name>hive.warehouse.subdir.inherit.perms</name>
 49 |     <value>true</value>
 50 |   </property>
 51 |   <property>
 52 |     <name>mapred.reduce.tasks</name>
 53 |     <value>-1</value>
 54 |   </property>
 55 |   <property>
 56 |     <name>hive.exec.reducers.bytes.per.reducer</name>
 57 |     <value>1073741824</value>
 58 |   </property>
 59 |   <property>
 60 |     <name>hive.exec.reducers.max</name>
 61 |     <value>999</value>
 62 |   </property>
 63 |   <property>
 64 |     <name>hive.metastore.execute.setugi</name>
 65 |     <value>true</value>
 66 |   </property>
 67 |   <property>
 68 |     <name>hive.support.concurrency</name>
 69 |     <value>true</value>
 70 |   </property>
 71 |   <property>
 72 |     <name>hive.zookeeper.quorum</name>
 73 |     <value>test-cdh5-hue.ent.cloudera.com</value>
 74 |   </property>
 75 |   <property>
 76 |     <name>hive.zookeeper.client.port</name>
 77 |     <value>2181</value>
 78 |   </property>
 79 |   <!--'hbase.zookeeper.quorum', originally set to 'test-cdh5-hue.ent.cloudera.com' (non-final), is overridden below by a safety valve-->
 80 |   <!--'hbase.zookeeper.property.clientPort', originally set to '2181' (non-final), is overridden below by a safety valve-->
 81 |   <property>
 82 |     <name>hive.zookeeper.namespace</name>
 83 |     <value>hive_zookeeper_namespace_HIVE-1</value>
 84 |   </property>
 85 |   <property>
 86 |     <name>hive.server2.enable.doAs</name>
 87 |     <value>true</value>
 88 |   </property>
 89 |   <property>
 90 |     <name>fs.hdfs.impl.disable.cache</name>
 91 |     <value>true</value>
 92 |   </property>
 93 |   <property>
 94 |     <name>hive.metastore.sasl.enabled</name>
 95 |     <value>true</value>
 96 |   </property>
 97 |   <property>
 98 |     <name>hive.server2.authentication</name>
 99 |     <value>kerberos</value>
100 |   </property>
101 |   <property>
102 |     <name>hive.metastore.kerberos.principal</name>
103 |     <value>hive/_HOST@ENT.CLOUDERA.COM</value>
104 |   </property>
105 |   <property>
106 | <name>hive.server2.authentication.kerberos.principal</name>
107 |     <value>hive/_HOST@ENT.CLOUDERA.COM</value>
108 |   </property>
109 |   <property>
110 |     <name>hive.stats.dbclass</name>
111 |     <value>jdbc:mysql</value>
112 |   </property>
113 |   <property>
114 |     <name>hive.stats.jdbcdriver</name>
115 |     <value>com.mysql.jdbc.Driver</value>
116 |   </property>
117 |   <property>
118 |     <name>hive.stats.dbconnectionstring</name>
119 | <value>jdbc:mysql://test-cdh5-hue.ent.cloudera.com:3306/hive1?user=hive1&amp;password=hive1</value>
120 |   </property>
121 |   <property>
122 |     <name>hive.aux.jars.path</name>
123 | <value>file:///usr/share/java//mysql-connector-java.jar</value>
124 |   </property>
125 |   <property>
126 |     <name>hbase.zookeeper.quorum</name>
127 |     <value>test-cdh5-hue.ent.cloudera.com</value>
128 |   </property>
129 |   <property>
130 |     <name>hbase.zookeeper.property.clientPort</name>
131 |     <value>2181</value>
132 |   </property>
133 | </configuration>
134 | 


--------------------------------------------------------------------------------
/oozie/credentials/hive.sql:
--------------------------------------------------------------------------------
1 | show tables;
2 | 
3 | select count(*) from sample_07;


--------------------------------------------------------------------------------
/oozie/credentials/workflow.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <workflow-app name="Hive-credentials" xmlns="uri:oozie:workflow:0.4">
 3 |   <credentials>
 4 |     <credential name="hcat" type="hcat">
 5 |       <property>
 6 |         <name>hcat.metastore.uri</name>
 7 |         <value>thrift://test-cdh5-hue.ent.cloudera.com:9083</value>
 8 |       </property>
 9 |       <property>
10 |         <name>hcat.metastore.principal</name>
11 |         <value>hive/test-cdh5-hue.ent.cloudera.com@ENT.CLOUDERA.COM</value>
12 |       </property>
13 |     </credential>
14 |   </credentials>
15 |     <start to="Hive"/>
16 |     <action name="Hive" cred="hcat">
17 |         <hive xmlns="uri:oozie:hive-action:0.2">
18 |             <job-tracker>${jobTracker}</job-tracker>
19 |             <name-node>${nameNode}</name-node>
20 |               <job-xml>hive-config.xml</job-xml>
21 |             <script>hive.sql</script>
22 |             <file>hive-config.xml#hive-config.xml</file>
23 |         </hive>
24 |         <ok to="end"/>
25 |         <error to="kill"/>
26 |     </action>
27 |     <kill name="kill">
28 |         <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
29 |     </kill>
30 |     <end name="end"/>
31 | </workflow-app>
32 | 


--------------------------------------------------------------------------------
/oozie/el-functions/rkanter/MyELFunctions.java:
--------------------------------------------------------------------------------
 1 | package rkanter;
 2 | 
 3 | public class MyELFunctions {
 4 |     /**
 5 |      * Compares two strings while ignoring case.
 6 |      *
 7 |      * @param s1 first string.
 8 |      * @param s2 second string.
 9 |      * @return true if <code>s1</code> and <code>s2</code> are equal, regardless of case, and false otherwise
10 |      */
11 |     public static boolean equalsIgnoreCase(String s1, String s2) {
12 |         if (s1 == null) {
13 |             return (s2 == null);
14 |         }
15 |         return s1.equalsIgnoreCase(s2);
16 |     }
17 | }


--------------------------------------------------------------------------------
/oozie/hiveserver2-action/hive-site3.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--Autogenerated by Cloudera Manager-->
  3 | <configuration>
  4 |    <property>
  5 |       <name>hive.metastore.local</name>
  6 |       <value>false</value>
  7 |    </property>
  8 |    <property>
  9 |       <name>hive.metastore.uris</name>
 10 |       <value>thrift://test-cdh5-hue.ent.cloudera.com:9083</value>
 11 |    </property>
 12 |    <property>
 13 |       <name>hive.metastore.client.socket.timeout</name>
 14 |       <value>300</value>
 15 |    </property>
 16 |    <property>
 17 |       <name>hive.metastore.warehouse.dir</name>
 18 |       <value>/user/hive/warehouse</value>
 19 |    </property>
 20 |    <property>
 21 |       <name>hive.warehouse.subdir.inherit.perms</name>
 22 |       <value>true</value>
 23 |    </property>
 24 |    <property>
 25 |       <name>mapred.reduce.tasks</name>
 26 |       <value>-1</value>
 27 |    </property>
 28 |    <property>
 29 |       <name>hive.exec.reducers.bytes.per.reducer</name>
 30 |       <value>1073741824</value>
 31 |    </property>
 32 |    <property>
 33 |       <name>hive.exec.reducers.max</name>
 34 |       <value>999</value>
 35 |    </property>
 36 |    <property>
 37 |       <name>hive.metastore.execute.setugi</name>
 38 |       <value>true</value>
 39 |    </property>
 40 |    <property>
 41 |       <name>hive.support.concurrency</name>
 42 |       <value>true</value>
 43 |    </property>
 44 |    <property>
 45 |       <name>hive.zookeeper.quorum</name>
 46 |       <value>test-cdh5-hue.ent.cloudera.com</value>
 47 |    </property>
 48 |    <property>
 49 |       <name>hive.zookeeper.client.port</name>
 50 |       <value>2181</value>
 51 |    </property>
 52 |    <!--'hbase.zookeeper.quorum', originally set to 'test-cdh5-hue.ent.cloudera.com' (non-final), is overridden below by a safety valve-->
 53 |    <!--'hbase.zookeeper.property.clientPort', originally set to '2181' (non-final), is overridden below by a safety valve-->
 54 |    <property>
 55 |       <name>hive.zookeeper.namespace</name>
 56 |       <value>hive_zookeeper_namespace_HIVE-1</value>
 57 |    </property>
 58 |    <property>
 59 |       <name>hive.server2.enable.doAs</name>
 60 |       <value>true</value>
 61 |    </property>
 62 |    <property>
 63 |       <name>fs.hdfs.impl.disable.cache</name>
 64 |       <value>true</value>
 65 |    </property>
 66 |    <property>
 67 |       <name>hive.metastore.sasl.enabled</name>
 68 |       <value>true</value>
 69 |    </property>
 70 |    <property>
 71 |       <name>hive.server2.authentication</name>
 72 |       <value>kerberos</value>
 73 |    </property>
 74 |    <property>
 75 |       <name>hive.metastore.kerberos.principal</name>
 76 |       <value>hive/_HOST@ENT.CLOUDERA.COM</value>
 77 |    </property>
 78 |    <property>
 79 |       <name>hive.server2.authentication.kerberos.principal</name>
 80 |       <value>hive/_HOST@ENT.CLOUDERA.COM</value>
 81 |    </property>
 82 |    <property>
 83 |       <name>hive.stats.dbclass</name>
 84 |       <value>jdbc:mysql</value>
 85 |    </property>
 86 |    <property>
 87 |       <name>hive.stats.jdbcdriver</name>
 88 |       <value>com.mysql.jdbc.Driver</value>
 89 |    </property>
 90 |    <property>
 91 |       <name>hive.stats.dbconnectionstring</name>
 92 |       <value>jdbc:mysql://test-cdh5-hue.ent.cloudera.com:3306/hive1?user=hive1&amp;password=hive1</value>
 93 |    </property>
 94 |    <property>
 95 |       <name>hive.aux.jars.path</name>
 96 |       <value>file:///usr/share/java//mysql-connector-java.jar</value>
 97 |    </property>
 98 |    <property>
 99 |       <name>hbase.zookeeper.quorum</name>
100 |       <value>test-cdh5-hue.ent.cloudera.com</value>
101 |    </property>
102 |    <property>
103 |       <name>hbase.zookeeper.property.clientPort</name>
104 |       <value>2181</value>
105 |    </property>
106 | </configuration>
107 | 


--------------------------------------------------------------------------------
/oozie/hiveserver2-action/select_genericl.sql:
--------------------------------------------------------------------------------
1 | !connect ${connectString} systest ${hive.password} org.apache.hive.jdbc.HiveDriver
2 | select count(*) from ${tableName};
3 | 


--------------------------------------------------------------------------------
/oozie/hiveserver2-action/workflow.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | <workflow-app name="HiveServer2-security" xmlns="uri:oozie:workflow:0.4">
 3 |   <credentials>
 4 |     <credential name="hive2" type="hive2">
 5 |       <property>
 6 |         <name>hive2.jdbc.url</name>
 7 |         <value>jdbc:hive2://test-cdh5-hue.ent.cloudera.com:10000/default</value>
 8 |       </property>
 9 |       <property>
10 |         <name>hive2.server.principal</name>
11 |         <value>hive/test-cdh5-hue.ent.cloudera.com@ENT.CLOUDERA.COM</value>
12 |       </property>
13 |     </credential>
14 |   </credentials>
15 |     <start to="simple_select"/>
16 |     <action name="simple_select" cred="hive2">
17 |         <hive xmlns="uri:oozie:hive-action:0.2">
18 |             <job-tracker>${jobTracker}</job-tracker>
19 |             <name-node>${nameNode}</name-node>
20 |               <job-xml>hive-site3.xml</job-xml>
21 |             <configuration>
22 |                 <property>
23 |                     <name>oozie.action.sharelib.for.hive</name>
24 |                     <value>hive2</value>
25 |                 </property>
26 |                 <property>
27 |                     <name>oozie.launcher.action.main.class</name>
28 |                     <value>org.apache.oozie.action.hadoop.Hive2Main</value>
29 |                 </property>
30 |             </configuration>
31 |             <script>select_genericl.sql</script>
32 |               <param>connectString=${connectString}</param>
33 |               <param>tableName=${tableName}</param>
34 |         </hive>
35 |         <ok to="end"/>
36 |         <error to="kill"/>
37 |     </action>
38 |     <kill name="kill">
39 |         <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
40 |     </kill>
41 |     <end name="end"/>
42 | </workflow-app>
43 | 


--------------------------------------------------------------------------------
/oozie/workflow_demo/bundle.xml:
--------------------------------------------------------------------------------
 1 | <bundle-app name="Aggregate"
 2 |   xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
 3 |   xmlns="uri:oozie:bundle:0.2">
 4 |   <parameters>
 5 |     <property>
 6 |         <name>oozie.use.system.libpath</name>
 7 |         <value>true</value>
 8 |     </property>
 9 |   </parameters>
10 |   <controls>
11 |      <kick-off-time>2013-06-01T00:00Z</kick-off-time>
12 |   </controls>
13 |   <coordinator name='DailySleep-1' >
14 |      <app-path>${nameNode}/user/hue/oozie/deployments/_romain_-oozie-827-1384552268.08</app-path>
15 |        <configuration>
16 |          <property>
17 |             <name>wf_application_path</name>
18 |             <value>hdfs://localhost:8020/user/hue/oozie/deployments/_romain_-oozie-807-1384552240.98</value>
19 |         </property>
20 |       </configuration>
21 |   </coordinator>
22 |   <coordinator name='DailyAnalytics-2' >
23 |      <app-path>${nameNode}/user/hue/oozie/deployments/_romain_-oozie-828-1384552298.63</app-path>
24 |        <configuration>
25 |          <property>
26 |             <name>wf_application_path</name>
27 |             <value>hdfs://localhost:8020/user/hue/oozie/deployments/_romain_-oozie-806-1384552283.32</value>
28 |         </property>
29 |       </configuration>
30 |   </coordinator>
31 | </bundle-app>
32 | 


--------------------------------------------------------------------------------
/oozie/workflow_demo/coordinator.xml:
--------------------------------------------------------------------------------
 1 | <coordinator-app name="DailySleep"
 2 |   frequency="${coord:days(1)}"
 3 |   start="2013-06-01T00:00Z" end="2013-06-05T00:00Z" timezone="America/Los_Angeles"
 4 |   xmlns="uri:oozie:coordinator:0.2">
 5 |   <datasets>
 6 |     <dataset name="sleep_time" frequency="${coord:days(1)}"
 7 |              initial-instance="2012-05-31T00:00Z" timezone="America/Los_Angeles">
 8 |       <uri-template>/user/romain/words/201207${DAY}</uri-template>
 9 |       <done-flag></done-flag>
10 |     </dataset>
11 |   </datasets>
12 |   <output-events>
13 |     <data-out name="DAILY_WORDS" dataset="sleep_time">
14 |     <instance>${coord:current(0)}</instance>
15 |     </data-out>
16 |   </output-events>
17 |   <action>
18 |     <workflow>
19 |       <app-path>...workflow.xml</app-path>
20 |       <configuration>
21 |         <property>
22 |           <name>input</name>
23 |           <value>${coord:dataOut('DAILY_WORDS')}</value>
24 |         </property>
25 |       </configuration>
26 |    </workflow>
27 |   </action>
28 | </coordinator-app>
29 | 


--------------------------------------------------------------------------------
/oozie/workflow_demo/job.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Hadoop tutorials project
 3 | #
 4 | 
 5 | input=/user/hue/pig/examples/data/midsummer.txt
 6 | 
 7 | oozie.use.system.libpath=true
 8 | 
 9 | nameNode=hdfs://localhost:8020
10 | jobTracker=localhost:8021
11 | queueName=default
12 | examplesRoot=examples
13 | 
14 | oozie.wf.application.path=/user/romain/examples/workflow_demo
15 | outputDir=map-reduce
16 | 


--------------------------------------------------------------------------------
/oozie/workflow_demo/lib/piggybank.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/romainr/hadoop-tutorials-examples/a070880c7d44402997080d860bb49cc234dff879/oozie/workflow_demo/lib/piggybank.jar


--------------------------------------------------------------------------------
/oozie/workflow_demo/script.pig:
--------------------------------------------------------------------------------
1 | REGISTER piggybank.jar;
2 | 
3 | data = LOAD '$input' as (text:CHARARRAY);
4 | upper_case =  FOREACH data GENERATE org.apache.pig.piggybank.evaluation.string.UPPER(text);
5 | 
6 | STORE upper_case INTO '$output' ;
7 | 


--------------------------------------------------------------------------------
/oozie/workflow_demo/workflow.xml:
--------------------------------------------------------------------------------
 1 | <workflow-app name="ilovehue" xmlns="uri:oozie:workflow:0.4">
 2 |     <start to="pig"/>
 3 |     <action name="pig">
 4 |         <pig>
 5 |             <job-tracker>${jobTracker}</job-tracker>
 6 |             <name-node>${nameNode}</name-node>
 7 |             <script>script.pig</script>
 8 |             <argument>-param</argument>
 9 |             <argument>input=${input}</argument>
10 |             <argument>-param</argument>
11 |             <argument>output=${output}</argument>
12 |         </pig>
13 |         <ok to="end"/>
14 |         <error to="kill"/>
15 |     </action>
16 |     <kill name="kill">
17 |         <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
18 |     </kill>
19 |     <end name="end"/>
20 | </workflow-app>


--------------------------------------------------------------------------------
/pig-json-python-udf/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | [Prepare the data for analysis with Pig and Python UDF](http://gethue.com/hadoop-tutorials-ii-1-prepare-the-data-for-analysis/)
5 | 


--------------------------------------------------------------------------------
/pig-json-python-udf/clean_json.pig:
--------------------------------------------------------------------------------
 1 | -- funny, useful, cool,user_id, review_id, stars, text, date, type, business_id
 2 | 
 3 | reviews = load 'yelp_academic_dataset_review.json'
 4 |         using JsonLoader('votes:map[],user_id:chararray,review_id:chararray,stars:int,date:chararray,text:chararray,type:chararray,business_id:chararray');
 5 | 
 6 | tabs = FOREACH reviews generate
 7 |   (INT) votes#'funny', (INT) votes#'useful', (INT) votes#'cool', user_id, review_id, stars, REPLACE(REPLACE(text, '\n', ''), '\t', ''), date, type, business_id;
 8 | 
 9 | STORE tabs INTO 'yelp_academic_dataset_review.tsv';
10 | 
11 | 


--------------------------------------------------------------------------------
/pig-json-python-udf/converter.py:
--------------------------------------------------------------------------------
1 | from com.xhaus.jyson import JysonCodec as json
2 | 
3 | @outputSchema("business:chararray")
4 | def tsvify(line):
5 |   business_json = json.loads(line)
6 |   business = map(unicode, business_json.values())
7 |   return '\t'.join(business).replace('\n', ' ').encode('utf-8')
8 | 
9 | 


--------------------------------------------------------------------------------
/pig-json-python-udf/python_udf.pig:
--------------------------------------------------------------------------------
 1 | -- city, review_count, name, neighborhoods, type, business_id, full_address, state, longitude, stars, latitude, open, categories
 2 | 
 3 | REGISTER 'converter.py' USING jython AS converter;
 4 | 
 5 | 
 6 | reviews = LOAD 'yelp_academic_dataset_business.json' AS (line:CHARARRAY);
 7 | 
 8 | tsv = FOREACH reviews GENERATE converter.tsvify(line);
 9 | 
10 | STORE tsv INTO 'yelp_academic_dataset_business.tsv';
11 | 
12 | 


--------------------------------------------------------------------------------
/search/indexing/README.md:
--------------------------------------------------------------------------------
 1 | Blog URL
 2 | ========
 3 | 
 4 | Dynamic search dashboards with Solr
 5 | http://gethue.com/hadoop-search-dynamic-search-dashboards-with-solr/
 6 | 
 7 | How to index
 8 | http://gethue.com/index-and-search-data-with-hadoop-and-solr
 9 | 
10 | 


--------------------------------------------------------------------------------
/search/indexing/apache_logs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Parses Apache Log files into a CSV data file ready to be indexed by Solr.
  4 | 
  5 | Input format:
  6 | demo.gethue.com:80 49.206.186.56 - - [04/May/2014:07:38:53 +0000] "GET /oozie/?format=json&type=running HTTP/1.1" 200 324 "http://demo.gethue.com/oozie/" "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.132 Safari/537.36"
  7 | 
  8 | Requires these libraries:
  9 | pip install pyyaml ua-parser
 10 | https://github.com/tobie/ua-parser
 11 | 
 12 | pip install pygeoip
 13 | https://github.com/appliedsec/pygeoip
 14 | 
 15 | Download http://geolite.maxmind.com/download/geoip/database/GeoLiteCity.dat.gz
 16 | from http://dev.maxmind.com/geoip/legacy/geolite/
 17 | 
 18 | Script is quick & dirty and given as an example.
 19 | """
 20 | 
 21 | import csv
 22 | import re
 23 | import uuid
 24 | 
 25 | import pygeoip
 26 | 
 27 | from datetime import datetime
 28 | from ua_parser import user_agent_parser
 29 | 
 30 | 
 31 | INPUT_LOGS = 'other_vhosts_access.log'
 32 | OUTPUT_CSV = 'index_data.csv'
 33 | MAX_ROWS = 1000
 34 | 
 35 | LINE_RE = re.compile('(?P<host>.+?) (?P<client_ip>[(\d\.)]+) - - \[(?P<time>.*?)\] "(?P<request>.+?)" (?P<code>\d+) (?P<bytes>\d+) "(?P<referer>.*?)" "(?P<user_agent>.*?)"')
 36 | 
 37 | 
 38 | def parse_line(line):
 39 |   data = LINE_RE.match(line).groupdict()
 40 | 
 41 |   del data['host']
 42 | 
 43 |   data['record'] = line
 44 |   data['id'] = uuid.uuid4()
 45 | 
 46 |   start = data['time'].replace(' +0000', '')
 47 |   start_ts = datetime.strptime(start, '%d/%b/%Y:%H:%M:%S')
 48 |   start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ')  # e.g. 2014-02-26T13:24:09Z
 49 |   data['time'] = start
 50 | 
 51 |   try:
 52 |     data['method'], data['url'], data['protocol'] = data['request'].split() # e.g. GET /metastore/table/default/sample_07 HTTP/1.1
 53 |     extension = data['url'].rsplit('.')
 54 |     data['extension'] = extension[-1] if len(extension) > 1 else None
 55 |   except Exception, e:
 56 |     print e
 57 |     data['method'], data['url'], data['protocol'] = None, None, None
 58 |     data['extension'] = None
 59 | 
 60 |   user_agent_string = data['user_agent'] # e.g. 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B179 Safari/7534.48.3'
 61 |   ua = user_agent_parser.Parse(user_agent_string)
 62 |   data['device_family'] = ua['device']['family']
 63 |   data['os_family'] = ua['os']['family']
 64 |   data['os_major'] = ua['os']['major']
 65 |   data['user_agent_family'] = ua['user_agent']['family']
 66 |   data['user_agent_major'] = ua['user_agent']['major']
 67 | 
 68 |   try:
 69 |     data['app'] = data['url'].split('/')[1]
 70 |   except:
 71 |     data['app'] = 'desktop'
 72 | 
 73 |   try:
 74 |     data['subapp'] = data['url'].split('/')[2]
 75 |     if '?' in data['subapp'] or '&' in data['subapp'] or '=' in data['subapp']:
 76 |       data['subapp'] = ''
 77 |   except:
 78 |     data['subapp'] = ''
 79 | 
 80 |   g = geoloc(data['client_ip'])
 81 |   if g is None:
 82 |     g = {}
 83 |   # From http://pygeoip.readthedocs.org/en/v0.3.1/getting-started.html
 84 |   data['city'] = g.get('city')
 85 |   data['country_code3'] = g.get('country_code3')
 86 |   data['latitude'] = g.get('latitude')
 87 |   data['longitude'] = g.get('longitude')
 88 |   data['country_code'] = g.get('country_code')
 89 |   data['country_name'] = g.get('country_name')
 90 |   data['region_code'] = g.get('region_code')
 91 | 
 92 |   return data
 93 | 
 94 | 
 95 | gi = pygeoip.GeoIP('GeoLiteCity.dat') # http://dev.maxmind.com/geoip/legacy/geolite/
 96 | 
 97 | def geoloc(ip):
 98 |   return gi.record_by_addr(ip)
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |   csvfile = open(OUTPUT_CSV, 'wb')
103 |   headers = None
104 | 
105 |   for line in open(INPUT_LOGS).readlines()[:MAX_ROWS]:
106 |     a = parse_line(line)
107 | 
108 |     if headers is None:
109 |       headers = a.keys()
110 |       print headers
111 |       dw = csv.DictWriter(csvfile, delimiter=',', fieldnames=headers)
112 |       dw.writeheader()
113 |       spamwriter = csv.writer(csvfile, delimiter=',')
114 | 
115 |     if a['client_ip'] != '127.0.0.1':
116 |       spamwriter.writerow([a[key] for key in headers])
117 | 
118 |   print 'done!'
119 | 


--------------------------------------------------------------------------------
/solr-local-search/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | TBD
5 | 


--------------------------------------------------------------------------------
/solr-local-search/data_subset.sql:
--------------------------------------------------------------------------------
1 | select b.business_id, cool, `date`, funny, review_id, b.stars, text, b.type, useful, user_id, name, city, full_address, latitude, longitude, neighborhoods, open, review_count, state
2 | FROM review r join  business b on r.business_id = b.business_id
3 | LIMIT 1000
4 | 
5 | 


--------------------------------------------------------------------------------
/solr-local-search/load_index.sh:
--------------------------------------------------------------------------------
1 | hadoop jar /usr/lib/solr/contrib/mr/search-mr-*-job.jar org.apache.solr.hadoop.MapReduceIndexerTool -D 'mapred.child.java.opts=-Xmx500m' --log4j /usr/share/doc/search*/examples/solr-nrt/log4j.properties --morphline-file solr_local/reviews.conf --output-dir hdfs://localhost:8020/tmp/load --verbose --go-live --zk-host localhost:2181/solr --collection reviews hdfs://localhost:8020/tmp/query_result.csv
2 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/admin-extra.html:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  contributor license agreements.  See the NOTICE file distributed with
 4 |  this work for additional information regarding copyright ownership.
 5 |  The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  (the "License"); you may not use this file except in compliance with
 7 |  the License.  You may obtain a copy of the License at
 8 | 
 9 |      http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |  Unless required by applicable law or agreed to in writing, software
12 |  distributed under the License is distributed on an "AS IS" BASIS,
13 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  See the License for the specific language governing permissions and
15 |  limitations under the License.
16 | -->
17 | 
18 | <!-- The content of this page will be statically included into the top-
19 | right box of the cores overview page. Uncomment this as an example to 
20 | see there the content will show up.
21 | 
22 | <img src="img/ico/construction.png"> This line will appear at the top-
23 | right box on collection1's Overview
24 | -->
25 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/admin-extra.menu-bottom.html:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  contributor license agreements.  See the NOTICE file distributed with
 4 |  this work for additional information regarding copyright ownership.
 5 |  The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  (the "License"); you may not use this file except in compliance with
 7 |  the License.  You may obtain a copy of the License at
 8 | 
 9 |      http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |  Unless required by applicable law or agreed to in writing, software
12 |  distributed under the License is distributed on an "AS IS" BASIS,
13 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  See the License for the specific language governing permissions and
15 |  limitations under the License.
16 | -->
17 | 
18 | <!-- admin-extra.menu-bottom.html -->
19 | <!--
20 | <li>
21 |   <a href="#" style="background-image: url(img/ico/construction.png);">
22 |     LAST ITEM
23 |   </a>
24 | </li>
25 | -->
26 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/admin-extra.menu-top.html:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  contributor license agreements.  See the NOTICE file distributed with
 4 |  this work for additional information regarding copyright ownership.
 5 |  The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  (the "License"); you may not use this file except in compliance with
 7 |  the License.  You may obtain a copy of the License at
 8 | 
 9 |      http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |  Unless required by applicable law or agreed to in writing, software
12 |  distributed under the License is distributed on an "AS IS" BASIS,
13 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  See the License for the specific language governing permissions and
15 |  limitations under the License.
16 | -->
17 | 
18 | <!-- admin-extra.menu-top.html -->
19 | <!--
20 | <li>
21 |   <a href="#" style="background-image: url(img/ico/construction.png);">
22 |     FIRST ITEM
23 |   </a>
24 | </li>
25 | -->
26 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/currency.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" ?>
 2 | <!--
 3 |  Licensed to the Apache Software Foundation (ASF) under one or more
 4 |  contributor license agreements.  See the NOTICE file distributed with
 5 |  this work for additional information regarding copyright ownership.
 6 |  The ASF licenses this file to You under the Apache License, Version 2.0
 7 |  (the "License"); you may not use this file except in compliance with
 8 |  the License.  You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |  Unless required by applicable law or agreed to in writing, software
13 |  distributed under the License is distributed on an "AS IS" BASIS,
14 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  See the License for the specific language governing permissions and
16 |  limitations under the License.
17 | -->
18 | 
19 | <!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
20 | 
21 | <currencyConfig version="1.0">
22 |   <rates>
23 |     <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
24 |     <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
25 |     <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
26 |     <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
27 |     <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
28 |     <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
29 |     <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
30 |     <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
31 |     <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
32 |     <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
33 |     <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
34 |     <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
35 |     <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
36 |     <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
37 |     <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
38 |     <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
39 |     <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
40 |     <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
41 |     <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
42 |     <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
43 |     <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
44 |     <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
45 |     <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
46 |     <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
47 |     <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
48 |     <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
49 |     <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
50 |     <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
51 |     <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
52 |     <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
53 |     <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
54 |     <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
55 |     <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
56 |     <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
57 |     <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
58 |     <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
59 |     <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
60 |     <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
61 |     
62 |     <!-- Cross-rates for some common currencies -->
63 |     <rate from="EUR" to="GBP" rate="0.869914" />  
64 |     <rate from="EUR" to="NOK" rate="7.800095" />  
65 |     <rate from="GBP" to="NOK" rate="8.966508" />  
66 |   </rates>
67 | </currencyConfig>
68 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/elevate.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <!--
 3 |  Licensed to the Apache Software Foundation (ASF) under one or more
 4 |  contributor license agreements.  See the NOTICE file distributed with
 5 |  this work for additional information regarding copyright ownership.
 6 |  The ASF licenses this file to You under the Apache License, Version 2.0
 7 |  (the "License"); you may not use this file except in compliance with
 8 |  the License.  You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |  Unless required by applicable law or agreed to in writing, software
13 |  distributed under the License is distributed on an "AS IS" BASIS,
14 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  See the License for the specific language governing permissions and
16 |  limitations under the License.
17 | -->
18 | 
19 | <!-- If this file is found in the config directory, it will only be
20 |      loaded once at startup.  If it is found in Solr's data
21 |      directory, it will be re-loaded every commit.
22 | 
23 |    See http://wiki.apache.org/solr/QueryElevationComponent for more info
24 | 
25 | -->
26 | <elevate>
27 |  <query text="foo bar">
28 |   <doc id="1" />
29 |   <doc id="2" />
30 |   <doc id="3" />
31 |  </query>
32 |  
33 |  <query text="ipod">
34 |    <doc id="MA147LL/A" />  <!-- put the actual ipod at the top -->
35 |    <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
36 |  </query>
37 |  
38 | </elevate>
39 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/contractions_ca.txt:
--------------------------------------------------------------------------------
1 | # Set of Catalan contractions for ElisionFilter
2 | # TODO: load this as a resource from the analyzer and sync it in build.xml
3 | d
4 | l
5 | m
6 | n
7 | s
8 | t
9 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/contractions_fr.txt:
--------------------------------------------------------------------------------
 1 | # Set of French contractions for ElisionFilter
 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml
 3 | l
 4 | m
 5 | t
 6 | qu
 7 | n
 8 | s
 9 | j
10 | d
11 | c
12 | jusqu
13 | quoiqu
14 | lorsqu
15 | puisqu
16 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/contractions_ga.txt:
--------------------------------------------------------------------------------
1 | # Set of Irish contractions for ElisionFilter
2 | # TODO: load this as a resource from the analyzer and sync it in build.xml
3 | d
4 | m
5 | b
6 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/contractions_it.txt:
--------------------------------------------------------------------------------
 1 | # Set of Italian contractions for ElisionFilter
 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml
 3 | c
 4 | l 
 5 | all 
 6 | dall 
 7 | dell 
 8 | nell 
 9 | sull 
10 | coll 
11 | pell 
12 | gl 
13 | agl 
14 | dagl 
15 | degl 
16 | negl 
17 | sugl 
18 | un 
19 | m 
20 | t 
21 | s 
22 | v 
23 | d
24 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/hyphenations_ga.txt:
--------------------------------------------------------------------------------
1 | # Set of Irish hyphenations for StopFilter
2 | # TODO: load this as a resource from the analyzer and sync it in build.xml
3 | h
4 | n
5 | t
6 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stemdict_nl.txt:
--------------------------------------------------------------------------------
1 | # Set of overrides for the dutch stemmer
2 | # TODO: load this as a resource from the analyzer and sync it in build.xml
3 | fiets	fiets
4 | bromfiets	bromfiets
5 | ei	eier
6 | kind	kinder
7 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_ar.txt:
--------------------------------------------------------------------------------
  1 | # This file was created by Jacques Savoy and is distributed under the BSD license.
  2 | # See http://members.unine.ch/jacques.savoy/clef/index.html.
  3 | # Also see http://www.opensource.org/licenses/bsd-license.html
  4 | # Cleaned on October 11, 2009 (not normalized, so use before normalization)
  5 | # This means that when modifying this list, you might need to add some 
  6 | # redundant entries, for example containing forms with both أ and ا
  7 | من
  8 | ومن
  9 | منها
 10 | منه
 11 | في
 12 | وفي
 13 | فيها
 14 | فيه
 15 | و
 16 | ف
 17 | ثم
 18 | او
 19 | أو
 20 | ب
 21 | بها
 22 | به
 23 | ا
 24 | أ
 25 | اى
 26 | اي
 27 | أي
 28 | أى
 29 | لا
 30 | ولا
 31 | الا
 32 | ألا
 33 | إلا
 34 | لكن
 35 | ما
 36 | وما
 37 | كما
 38 | فما
 39 | عن
 40 | مع
 41 | اذا
 42 | إذا
 43 | ان
 44 | أن
 45 | إن
 46 | انها
 47 | أنها
 48 | إنها
 49 | انه
 50 | أنه
 51 | إنه
 52 | بان
 53 | بأن
 54 | فان
 55 | فأن
 56 | وان
 57 | وأن
 58 | وإن
 59 | التى
 60 | التي
 61 | الذى
 62 | الذي
 63 | الذين
 64 | الى
 65 | الي
 66 | إلى
 67 | إلي
 68 | على
 69 | عليها
 70 | عليه
 71 | اما
 72 | أما
 73 | إما
 74 | ايضا
 75 | أيضا
 76 | كل
 77 | وكل
 78 | لم
 79 | ولم
 80 | لن
 81 | ولن
 82 | هى
 83 | هي
 84 | هو
 85 | وهى
 86 | وهي
 87 | وهو
 88 | فهى
 89 | فهي
 90 | فهو
 91 | انت
 92 | أنت
 93 | لك
 94 | لها
 95 | له
 96 | هذه
 97 | هذا
 98 | تلك
 99 | ذلك
100 | هناك
101 | كانت
102 | كان
103 | يكون
104 | تكون
105 | وكانت
106 | وكان
107 | غير
108 | بعض
109 | قد
110 | نحو
111 | بين
112 | بينما
113 | منذ
114 | ضمن
115 | حيث
116 | الان
117 | الآن
118 | خلال
119 | بعد
120 | قبل
121 | حتى
122 | عند
123 | عندما
124 | لدى
125 | جميع
126 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_bg.txt:
--------------------------------------------------------------------------------
  1 | # This file was created by Jacques Savoy and is distributed under the BSD license.
  2 | # See http://members.unine.ch/jacques.savoy/clef/index.html.
  3 | # Also see http://www.opensource.org/licenses/bsd-license.html
  4 | а
  5 | аз
  6 | ако
  7 | ала
  8 | бе
  9 | без
 10 | беше
 11 | би
 12 | бил
 13 | била
 14 | били
 15 | било
 16 | близо
 17 | бъдат
 18 | бъде
 19 | бяха
 20 | в
 21 | вас
 22 | ваш
 23 | ваша
 24 | вероятно
 25 | вече
 26 | взема
 27 | ви
 28 | вие
 29 | винаги
 30 | все
 31 | всеки
 32 | всички
 33 | всичко
 34 | всяка
 35 | във
 36 | въпреки
 37 | върху
 38 | г
 39 | ги
 40 | главно
 41 | го
 42 | д
 43 | да
 44 | дали
 45 | до
 46 | докато
 47 | докога
 48 | дори
 49 | досега
 50 | доста
 51 | е
 52 | едва
 53 | един
 54 | ето
 55 | за
 56 | зад
 57 | заедно
 58 | заради
 59 | засега
 60 | затова
 61 | защо
 62 | защото
 63 | и
 64 | из
 65 | или
 66 | им
 67 | има
 68 | имат
 69 | иска
 70 | й
 71 | каза
 72 | как
 73 | каква
 74 | какво
 75 | както
 76 | какъв
 77 | като
 78 | кога
 79 | когато
 80 | което
 81 | които
 82 | кой
 83 | който
 84 | колко
 85 | която
 86 | къде
 87 | където
 88 | към
 89 | ли
 90 | м
 91 | ме
 92 | между
 93 | мен
 94 | ми
 95 | мнозина
 96 | мога
 97 | могат
 98 | може
 99 | моля
100 | момента
101 | му
102 | н
103 | на
104 | над
105 | назад
106 | най
107 | направи
108 | напред
109 | например
110 | нас
111 | не
112 | него
113 | нея
114 | ни
115 | ние
116 | никой
117 | нито
118 | но
119 | някои
120 | някой
121 | няма
122 | обаче
123 | около
124 | освен
125 | особено
126 | от
127 | отгоре
128 | отново
129 | още
130 | пак
131 | по
132 | повече
133 | повечето
134 | под
135 | поне
136 | поради
137 | после
138 | почти
139 | прави
140 | пред
141 | преди
142 | през
143 | при
144 | пък
145 | първо
146 | с
147 | са
148 | само
149 | се
150 | сега
151 | си
152 | скоро
153 | след
154 | сме
155 | според
156 | сред
157 | срещу
158 | сте
159 | съм
160 | със
161 | също
162 | т
163 | тази
164 | така
165 | такива
166 | такъв
167 | там
168 | твой
169 | те
170 | тези
171 | ти
172 | тн
173 | то
174 | това
175 | тогава
176 | този
177 | той
178 | толкова
179 | точно
180 | трябва
181 | тук
182 | тъй
183 | тя
184 | тях
185 | у
186 | харесва
187 | ч
188 | че
189 | често
190 | чрез
191 | ще
192 | щом
193 | я
194 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_ca.txt:
--------------------------------------------------------------------------------
  1 | # Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
  2 | a
  3 | abans
  4 | ací
  5 | ah
  6 | així
  7 | això
  8 | al
  9 | als
 10 | aleshores
 11 | algun
 12 | alguna
 13 | algunes
 14 | alguns
 15 | alhora
 16 | allà
 17 | allí
 18 | allò
 19 | altra
 20 | altre
 21 | altres
 22 | amb
 23 | ambdós
 24 | ambdues
 25 | apa
 26 | aquell
 27 | aquella
 28 | aquelles
 29 | aquells
 30 | aquest
 31 | aquesta
 32 | aquestes
 33 | aquests
 34 | aquí
 35 | baix
 36 | cada
 37 | cadascú
 38 | cadascuna
 39 | cadascunes
 40 | cadascuns
 41 | com
 42 | contra
 43 | d'un
 44 | d'una
 45 | d'unes
 46 | d'uns
 47 | dalt
 48 | de
 49 | del
 50 | dels
 51 | des
 52 | després
 53 | dins
 54 | dintre
 55 | donat
 56 | doncs
 57 | durant
 58 | e
 59 | eh
 60 | el
 61 | els
 62 | em
 63 | en
 64 | encara
 65 | ens
 66 | entre
 67 | érem
 68 | eren
 69 | éreu
 70 | es
 71 | és
 72 | esta
 73 | està
 74 | estàvem
 75 | estaven
 76 | estàveu
 77 | esteu
 78 | et
 79 | etc
 80 | ets
 81 | fins
 82 | fora
 83 | gairebé
 84 | ha
 85 | han
 86 | has
 87 | havia
 88 | he
 89 | hem
 90 | heu
 91 | hi 
 92 | ho
 93 | i
 94 | igual
 95 | iguals
 96 | ja
 97 | l'hi
 98 | la
 99 | les
100 | li
101 | li'n
102 | llavors
103 | m'he
104 | ma
105 | mal
106 | malgrat
107 | mateix
108 | mateixa
109 | mateixes
110 | mateixos
111 | me
112 | mentre
113 | més
114 | meu
115 | meus
116 | meva
117 | meves
118 | molt
119 | molta
120 | moltes
121 | molts
122 | mon
123 | mons
124 | n'he
125 | n'hi
126 | ne
127 | ni
128 | no
129 | nogensmenys
130 | només
131 | nosaltres
132 | nostra
133 | nostre
134 | nostres
135 | o
136 | oh
137 | oi
138 | on
139 | pas
140 | pel
141 | pels
142 | per
143 | però
144 | perquè
145 | poc 
146 | poca
147 | pocs
148 | poques
149 | potser
150 | propi
151 | qual
152 | quals
153 | quan
154 | quant 
155 | que
156 | què
157 | quelcom
158 | qui
159 | quin
160 | quina
161 | quines
162 | quins
163 | s'ha
164 | s'han
165 | sa
166 | semblant
167 | semblants
168 | ses
169 | seu 
170 | seus
171 | seva
172 | seva
173 | seves
174 | si
175 | sobre
176 | sobretot
177 | sóc
178 | solament
179 | sols
180 | son 
181 | són
182 | sons 
183 | sota
184 | sou
185 | t'ha
186 | t'han
187 | t'he
188 | ta
189 | tal
190 | també
191 | tampoc
192 | tan
193 | tant
194 | tanta
195 | tantes
196 | teu
197 | teus
198 | teva
199 | teves
200 | ton
201 | tons
202 | tot
203 | tota
204 | totes
205 | tots
206 | un
207 | una
208 | unes
209 | uns
210 | us
211 | va
212 | vaig
213 | vam
214 | van
215 | vas
216 | veu
217 | vosaltres
218 | vostra
219 | vostre
220 | vostres
221 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_cz.txt:
--------------------------------------------------------------------------------
  1 | a
  2 | s
  3 | k
  4 | o
  5 | i
  6 | u
  7 | v
  8 | z
  9 | dnes
 10 | cz
 11 | tímto
 12 | budeš
 13 | budem
 14 | byli
 15 | jseš
 16 | můj
 17 | svým
 18 | ta
 19 | tomto
 20 | tohle
 21 | tuto
 22 | tyto
 23 | jej
 24 | zda
 25 | proč
 26 | máte
 27 | tato
 28 | kam
 29 | tohoto
 30 | kdo
 31 | kteří
 32 | mi
 33 | nám
 34 | tom
 35 | tomuto
 36 | mít
 37 | nic
 38 | proto
 39 | kterou
 40 | byla
 41 | toho
 42 | protože
 43 | asi
 44 | ho
 45 | naši
 46 | napište
 47 | re
 48 | což
 49 | tím
 50 | takže
 51 | svých
 52 | její
 53 | svými
 54 | jste
 55 | aj
 56 | tu
 57 | tedy
 58 | teto
 59 | bylo
 60 | kde
 61 | ke
 62 | pravé
 63 | ji
 64 | nad
 65 | nejsou
 66 | či
 67 | pod
 68 | téma
 69 | mezi
 70 | přes
 71 | ty
 72 | pak
 73 | vám
 74 | ani
 75 | když
 76 | však
 77 | neg
 78 | jsem
 79 | tento
 80 | článku
 81 | články
 82 | aby
 83 | jsme
 84 | před
 85 | pta
 86 | jejich
 87 | byl
 88 | ještě
 89 | až
 90 | bez
 91 | také
 92 | pouze
 93 | první
 94 | vaše
 95 | která
 96 | nás
 97 | nový
 98 | tipy
 99 | pokud
100 | může
101 | strana
102 | jeho
103 | své
104 | jiné
105 | zprávy
106 | nové
107 | není
108 | vás
109 | jen
110 | podle
111 | zde
112 | už
113 | být
114 | více
115 | bude
116 | již
117 | než
118 | který
119 | by
120 | které
121 | co
122 | nebo
123 | ten
124 | tak
125 | má
126 | při
127 | od
128 | po
129 | jsou
130 | jak
131 | další
132 | ale
133 | si
134 | se
135 | ve
136 | to
137 | jako
138 | za
139 | zpět
140 | ze
141 | do
142 | pro
143 | je
144 | na
145 | atd
146 | atp
147 | jakmile
148 | přičemž
149 | já
150 | on
151 | ona
152 | ono
153 | oni
154 | ony
155 | my
156 | vy
157 | jí
158 | ji
159 | mě
160 | mne
161 | jemu
162 | tomu
163 | těm
164 | těmu
165 | němu
166 | němuž
167 | jehož
168 | jíž
169 | jelikož
170 | jež
171 | jakož
172 | načež
173 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_da.txt:
--------------------------------------------------------------------------------
  1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
  2 |  | This file is distributed under the BSD License.
  3 |  | See http://snowball.tartarus.org/license.php
  4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
  5 |  |  - Encoding was converted to UTF-8.
  6 |  |  - This notice was added.
  7 | 
  8 |  | A Danish stop word list. Comments begin with vertical bar. Each stop
  9 |  | word is at the start of a line.
 10 | 
 11 |  | This is a ranked list (commonest to rarest) of stopwords derived from
 12 |  | a large text sample.
 13 | 
 14 | 
 15 | og           | and
 16 | i            | in
 17 | jeg          | I
 18 | det          | that (dem. pronoun)/it (pers. pronoun)
 19 | at           | that (in front of a sentence)/to (with infinitive)
 20 | en           | a/an
 21 | den          | it (pers. pronoun)/that (dem. pronoun)
 22 | til          | to/at/for/until/against/by/of/into, more
 23 | er           | present tense of "to be"
 24 | som          | who, as
 25 | på           | on/upon/in/on/at/to/after/of/with/for, on
 26 | de           | they
 27 | med          | with/by/in, along
 28 | han          | he
 29 | af           | of/by/from/off/for/in/with/on, off
 30 | for          | at/for/to/from/by/of/ago, in front/before, because
 31 | ikke         | not
 32 | der          | who/which, there/those
 33 | var          | past tense of "to be"
 34 | mig          | me/myself
 35 | sig          | oneself/himself/herself/itself/themselves
 36 | men          | but
 37 | et           | a/an/one, one (number), someone/somebody/one
 38 | har          | present tense of "to have"
 39 | om           | round/about/for/in/a, about/around/down, if
 40 | vi           | we
 41 | min          | my
 42 | havde        | past tense of "to have"
 43 | ham          | him
 44 | hun          | she
 45 | nu           | now
 46 | over         | over/above/across/by/beyond/past/on/about, over/past
 47 | da           | then, when/as/since
 48 | fra          | from/off/since, off, since
 49 | du           | you
 50 | ud           | out
 51 | sin          | his/her/its/one's
 52 | dem          | them
 53 | os           | us/ourselves
 54 | op           | up
 55 | man          | you/one
 56 | hans         | his
 57 | hvor         | where
 58 | eller        | or
 59 | hvad         | what
 60 | skal         | must/shall etc.
 61 | selv         | myself/youself/herself/ourselves etc., even
 62 | her          | here
 63 | alle         | all/everyone/everybody etc.
 64 | vil          | will (verb)
 65 | blev         | past tense of "to stay/to remain/to get/to become"
 66 | kunne        | could
 67 | ind          | in
 68 | når          | when
 69 | være         | present tense of "to be"
 70 | dog          | however/yet/after all
 71 | noget        | something
 72 | ville        | would
 73 | jo           | you know/you see (adv), yes
 74 | deres        | their/theirs
 75 | efter        | after/behind/according to/for/by/from, later/afterwards
 76 | ned          | down
 77 | skulle       | should
 78 | denne        | this
 79 | end          | than
 80 | dette        | this
 81 | mit          | my/mine
 82 | også         | also
 83 | under        | under/beneath/below/during, below/underneath
 84 | have         | have
 85 | dig          | you
 86 | anden        | other
 87 | hende        | her
 88 | mine         | my
 89 | alt          | everything
 90 | meget        | much/very, plenty of
 91 | sit          | his, her, its, one's
 92 | sine         | his, her, its, one's
 93 | vor          | our
 94 | mod          | against
 95 | disse        | these
 96 | hvis         | if
 97 | din          | your/yours
 98 | nogle        | some
 99 | hos          | by/at
100 | blive        | be/become
101 | mange        | many
102 | ad           | by/through
103 | bliver       | present tense of "to be/to become"
104 | hendes       | her/hers
105 | været        | be
106 | thi          | for (conj)
107 | jer          | you
108 | sådan        | such, like this/like that
109 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_de.txt:
--------------------------------------------------------------------------------
  1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
  2 |  | This file is distributed under the BSD License.
  3 |  | See http://snowball.tartarus.org/license.php
  4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
  5 |  |  - Encoding was converted to UTF-8.
  6 |  |  - This notice was added.
  7 | 
  8 |  | A German stop word list. Comments begin with vertical bar. Each stop
  9 |  | word is at the start of a line.
 10 | 
 11 |  | The number of forms in this list is reduced significantly by passing it
 12 |  | through the German stemmer.
 13 | 
 14 | 
 15 | aber           |  but
 16 | 
 17 | alle           |  all
 18 | allem
 19 | allen
 20 | aller
 21 | alles
 22 | 
 23 | als            |  than, as
 24 | also           |  so
 25 | am             |  an + dem
 26 | an             |  at
 27 | 
 28 | ander          |  other
 29 | andere
 30 | anderem
 31 | anderen
 32 | anderer
 33 | anderes
 34 | anderm
 35 | andern
 36 | anderr
 37 | anders
 38 | 
 39 | auch           |  also
 40 | auf            |  on
 41 | aus            |  out of
 42 | bei            |  by
 43 | bin            |  am
 44 | bis            |  until
 45 | bist           |  art
 46 | da             |  there
 47 | damit          |  with it
 48 | dann           |  then
 49 | 
 50 | der            |  the
 51 | den
 52 | des
 53 | dem
 54 | die
 55 | das
 56 | 
 57 | daß            |  that
 58 | 
 59 | derselbe       |  the same
 60 | derselben
 61 | denselben
 62 | desselben
 63 | demselben
 64 | dieselbe
 65 | dieselben
 66 | dasselbe
 67 | 
 68 | dazu           |  to that
 69 | 
 70 | dein           |  thy
 71 | deine
 72 | deinem
 73 | deinen
 74 | deiner
 75 | deines
 76 | 
 77 | denn           |  because
 78 | 
 79 | derer          |  of those
 80 | dessen         |  of him
 81 | 
 82 | dich           |  thee
 83 | dir            |  to thee
 84 | du             |  thou
 85 | 
 86 | dies           |  this
 87 | diese
 88 | diesem
 89 | diesen
 90 | dieser
 91 | dieses
 92 | 
 93 | 
 94 | doch           |  (several meanings)
 95 | dort           |  (over) there
 96 | 
 97 | 
 98 | durch          |  through
 99 | 
100 | ein            |  a
101 | eine
102 | einem
103 | einen
104 | einer
105 | eines
106 | 
107 | einig          |  some
108 | einige
109 | einigem
110 | einigen
111 | einiger
112 | einiges
113 | 
114 | einmal         |  once
115 | 
116 | er             |  he
117 | ihn            |  him
118 | ihm            |  to him
119 | 
120 | es             |  it
121 | etwas          |  something
122 | 
123 | euer           |  your
124 | eure
125 | eurem
126 | euren
127 | eurer
128 | eures
129 | 
130 | für            |  for
131 | gegen          |  towards
132 | gewesen        |  p.p. of sein
133 | hab            |  have
134 | habe           |  have
135 | haben          |  have
136 | hat            |  has
137 | hatte          |  had
138 | hatten         |  had
139 | hier           |  here
140 | hin            |  there
141 | hinter         |  behind
142 | 
143 | ich            |  I
144 | mich           |  me
145 | mir            |  to me
146 | 
147 | 
148 | ihr            |  you, to her
149 | ihre
150 | ihrem
151 | ihren
152 | ihrer
153 | ihres
154 | euch           |  to you
155 | 
156 | im             |  in + dem
157 | in             |  in
158 | indem          |  while
159 | ins            |  in + das
160 | ist            |  is
161 | 
162 | jede           |  each, every
163 | jedem
164 | jeden
165 | jeder
166 | jedes
167 | 
168 | jene           |  that
169 | jenem
170 | jenen
171 | jener
172 | jenes
173 | 
174 | jetzt          |  now
175 | kann           |  can
176 | 
177 | kein           |  no
178 | keine
179 | keinem
180 | keinen
181 | keiner
182 | keines
183 | 
184 | können         |  can
185 | könnte         |  could
186 | machen         |  do
187 | man            |  one
188 | 
189 | manche         |  some, many a
190 | manchem
191 | manchen
192 | mancher
193 | manches
194 | 
195 | mein           |  my
196 | meine
197 | meinem
198 | meinen
199 | meiner
200 | meines
201 | 
202 | mit            |  with
203 | muss           |  must
204 | musste         |  had to
205 | nach           |  to(wards)
206 | nicht          |  not
207 | nichts         |  nothing
208 | noch           |  still, yet
209 | nun            |  now
210 | nur            |  only
211 | ob             |  whether
212 | oder           |  or
213 | ohne           |  without
214 | sehr           |  very
215 | 
216 | sein           |  his
217 | seine
218 | seinem
219 | seinen
220 | seiner
221 | seines
222 | 
223 | selbst         |  self
224 | sich           |  herself
225 | 
226 | sie            |  they, she
227 | ihnen          |  to them
228 | 
229 | sind           |  are
230 | so             |  so
231 | 
232 | solche         |  such
233 | solchem
234 | solchen
235 | solcher
236 | solches
237 | 
238 | soll           |  shall
239 | sollte         |  should
240 | sondern        |  but
241 | sonst          |  else
242 | über           |  over
243 | um             |  about, around
244 | und            |  and
245 | 
246 | uns            |  us
247 | unse
248 | unsem
249 | unsen
250 | unser
251 | unses
252 | 
253 | unter          |  under
254 | viel           |  much
255 | vom            |  von + dem
256 | von            |  from
257 | vor            |  before
258 | während        |  while
259 | war            |  was
260 | waren          |  were
261 | warst          |  wast
262 | was            |  what
263 | weg            |  away, off
264 | weil           |  because
265 | weiter         |  further
266 | 
267 | welche         |  which
268 | welchem
269 | welchen
270 | welcher
271 | welches
272 | 
273 | wenn           |  when
274 | werde          |  will
275 | werden         |  will
276 | wie            |  how
277 | wieder         |  again
278 | will           |  want
279 | wir            |  we
280 | wird           |  will
281 | wirst          |  willst
282 | wo             |  where
283 | wollen         |  want
284 | wollte         |  wanted
285 | würde          |  would
286 | würden         |  would
287 | zu             |  to
288 | zum            |  zu + dem
289 | zur            |  zu + der
290 | zwar           |  indeed
291 | zwischen       |  between
292 | 
293 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_el.txt:
--------------------------------------------------------------------------------
 1 | # Lucene Greek Stopwords list
 2 | # Note: by default this file is used after GreekLowerCaseFilter,
 3 | # so when modifying this file use 'σ' instead of 'ς' 
 4 | ο
 5 | η
 6 | το
 7 | οι
 8 | τα
 9 | του
10 | τησ
11 | των
12 | τον
13 | την
14 | και 
15 | κι
16 | κ
17 | ειμαι
18 | εισαι
19 | ειναι
20 | ειμαστε
21 | ειστε
22 | στο
23 | στον
24 | στη
25 | στην
26 | μα
27 | αλλα
28 | απο
29 | για
30 | προσ
31 | με
32 | σε
33 | ωσ
34 | παρα
35 | αντι
36 | κατα
37 | μετα
38 | θα
39 | να
40 | δε
41 | δεν
42 | μη
43 | μην
44 | επι
45 | ενω
46 | εαν
47 | αν
48 | τοτε
49 | που
50 | πωσ
51 | ποιοσ
52 | ποια
53 | ποιο
54 | ποιοι
55 | ποιεσ
56 | ποιων
57 | ποιουσ
58 | αυτοσ
59 | αυτη
60 | αυτο
61 | αυτοι
62 | αυτων
63 | αυτουσ
64 | αυτεσ
65 | αυτα
66 | εκεινοσ
67 | εκεινη
68 | εκεινο
69 | εκεινοι
70 | εκεινεσ
71 | εκεινα
72 | εκεινων
73 | εκεινουσ
74 | οπωσ
75 | ομωσ
76 | ισωσ
77 | οσο
78 | οτι
79 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_en.txt:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # a couple of test stopwords to test that the words are really being
17 | # configured from this file:
18 | stopworda
19 | stopwordb
20 | 
21 | # Standard english stop words taken from Lucene's StopAnalyzer
22 | a
23 | an
24 | and
25 | are
26 | as
27 | at
28 | be
29 | but
30 | by
31 | for
32 | if
33 | in
34 | into
35 | is
36 | it
37 | no
38 | not
39 | of
40 | on
41 | or
42 | such
43 | that
44 | the
45 | their
46 | then
47 | there
48 | these
49 | they
50 | this
51 | to
52 | was
53 | will
54 | with
55 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_eu.txt:
--------------------------------------------------------------------------------
  1 | # example set of basque stopwords
  2 | al
  3 | anitz
  4 | arabera
  5 | asko
  6 | baina
  7 | bat
  8 | batean
  9 | batek
 10 | bati
 11 | batzuei
 12 | batzuek
 13 | batzuetan
 14 | batzuk
 15 | bera
 16 | beraiek
 17 | berau
 18 | berauek
 19 | bere
 20 | berori
 21 | beroriek
 22 | beste
 23 | bezala
 24 | da
 25 | dago
 26 | dira
 27 | ditu
 28 | du
 29 | dute
 30 | edo
 31 | egin
 32 | ere
 33 | eta
 34 | eurak
 35 | ez
 36 | gainera
 37 | gu
 38 | gutxi
 39 | guzti
 40 | haiei
 41 | haiek
 42 | haietan
 43 | hainbeste
 44 | hala
 45 | han
 46 | handik
 47 | hango
 48 | hara
 49 | hari
 50 | hark
 51 | hartan
 52 | hau
 53 | hauei
 54 | hauek
 55 | hauetan
 56 | hemen
 57 | hemendik
 58 | hemengo
 59 | hi
 60 | hona
 61 | honek
 62 | honela
 63 | honetan
 64 | honi
 65 | hor
 66 | hori
 67 | horiei
 68 | horiek
 69 | horietan
 70 | horko
 71 | horra
 72 | horrek
 73 | horrela
 74 | horretan
 75 | horri
 76 | hortik
 77 | hura
 78 | izan
 79 | ni
 80 | noiz
 81 | nola
 82 | non
 83 | nondik
 84 | nongo
 85 | nor
 86 | nora
 87 | ze
 88 | zein
 89 | zen
 90 | zenbait
 91 | zenbat
 92 | zer
 93 | zergatik
 94 | ziren
 95 | zituen
 96 | zu
 97 | zuek
 98 | zuen
 99 | zuten
100 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_fa.txt:
--------------------------------------------------------------------------------
  1 | # This file was created by Jacques Savoy and is distributed under the BSD license.
  2 | # See http://members.unine.ch/jacques.savoy/clef/index.html.
  3 | # Also see http://www.opensource.org/licenses/bsd-license.html
  4 | # Note: by default this file is used after normalization, so when adding entries
  5 | # to this file, use the arabic 'ي' instead of 'ی'
  6 | انان
  7 | نداشته
  8 | سراسر
  9 | خياه
 10 | ايشان
 11 | وي
 12 | تاكنون
 13 | بيشتري
 14 | دوم
 15 | پس
 16 | ناشي
 17 | وگو
 18 | يا
 19 | داشتند
 20 | سپس
 21 | هنگام
 22 | هرگز
 23 | پنج
 24 | نشان
 25 | امسال
 26 | ديگر
 27 | گروهي
 28 | شدند
 29 | چطور
 30 | ده
 31 | و
 32 | دو
 33 | نخستين
 34 | ولي
 35 | چرا
 36 | چه
 37 | وسط
 38 | ه
 39 | كدام
 40 | قابل
 41 | يك
 42 | رفت
 43 | هفت
 44 | همچنين
 45 | در
 46 | هزار
 47 | بله
 48 | بلي
 49 | شايد
 50 | اما
 51 | شناسي
 52 | گرفته
 53 | دهد
 54 | داشته
 55 | دانست
 56 | داشتن
 57 | خواهيم
 58 | ميليارد
 59 | وقتيكه
 60 | امد
 61 | خواهد
 62 | جز
 63 | اورده
 64 | شده
 65 | بلكه
 66 | خدمات
 67 | شدن
 68 | برخي
 69 | نبود
 70 | بسياري
 71 | جلوگيري
 72 | حق
 73 | كردند
 74 | نوعي
 75 | بعري
 76 | نكرده
 77 | نظير
 78 | نبايد
 79 | بوده
 80 | بودن
 81 | داد
 82 | اورد
 83 | هست
 84 | جايي
 85 | شود
 86 | دنبال
 87 | داده
 88 | بايد
 89 | سابق
 90 | هيچ
 91 | همان
 92 | انجا
 93 | كمتر
 94 | كجاست
 95 | گردد
 96 | كسي
 97 | تر
 98 | مردم
 99 | تان
100 | دادن
101 | بودند
102 | سري
103 | جدا
104 | ندارند
105 | مگر
106 | يكديگر
107 | دارد
108 | دهند
109 | بنابراين
110 | هنگامي
111 | سمت
112 | جا
113 | انچه
114 | خود
115 | دادند
116 | زياد
117 | دارند
118 | اثر
119 | بدون
120 | بهترين
121 | بيشتر
122 | البته
123 | به
124 | براساس
125 | بيرون
126 | كرد
127 | بعضي
128 | گرفت
129 | توي
130 | اي
131 | ميليون
132 | او
133 | جريان
134 | تول
135 | بر
136 | مانند
137 | برابر
138 | باشيم
139 | مدتي
140 | گويند
141 | اكنون
142 | تا
143 | تنها
144 | جديد
145 | چند
146 | بي
147 | نشده
148 | كردن
149 | كردم
150 | گويد
151 | كرده
152 | كنيم
153 | نمي
154 | نزد
155 | روي
156 | قصد
157 | فقط
158 | بالاي
159 | ديگران
160 | اين
161 | ديروز
162 | توسط
163 | سوم
164 | ايم
165 | دانند
166 | سوي
167 | استفاده
168 | شما
169 | كنار
170 | داريم
171 | ساخته
172 | طور
173 | امده
174 | رفته
175 | نخست
176 | بيست
177 | نزديك
178 | طي
179 | كنيد
180 | از
181 | انها
182 | تمامي
183 | داشت
184 | يكي
185 | طريق
186 | اش
187 | چيست
188 | روب
189 | نمايد
190 | گفت
191 | چندين
192 | چيزي
193 | تواند
194 | ام
195 | ايا
196 | با
197 | ان
198 | ايد
199 | ترين
200 | اينكه
201 | ديگري
202 | راه
203 | هايي
204 | بروز
205 | همچنان
206 | پاعين
207 | كس
208 | حدود
209 | مختلف
210 | مقابل
211 | چيز
212 | گيرد
213 | ندارد
214 | ضد
215 | همچون
216 | سازي
217 | شان
218 | مورد
219 | باره
220 | مرسي
221 | خويش
222 | برخوردار
223 | چون
224 | خارج
225 | شش
226 | هنوز
227 | تحت
228 | ضمن
229 | هستيم
230 | گفته
231 | فكر
232 | بسيار
233 | پيش
234 | براي
235 | روزهاي
236 | انكه
237 | نخواهد
238 | بالا
239 | كل
240 | وقتي
241 | كي
242 | چنين
243 | كه
244 | گيري
245 | نيست
246 | است
247 | كجا
248 | كند
249 | نيز
250 | يابد
251 | بندي
252 | حتي
253 | توانند
254 | عقب
255 | خواست
256 | كنند
257 | بين
258 | تمام
259 | همه
260 | ما
261 | باشند
262 | مثل
263 | شد
264 | اري
265 | باشد
266 | اره
267 | طبق
268 | بعد
269 | اگر
270 | صورت
271 | غير
272 | جاي
273 | بيش
274 | ريزي
275 | اند
276 | زيرا
277 | چگونه
278 | بار
279 | لطفا
280 | مي
281 | درباره
282 | من
283 | ديده
284 | همين
285 | گذاري
286 | برداري
287 | علت
288 | گذاشته
289 | هم
290 | فوق
291 | نه
292 | ها
293 | شوند
294 | اباد
295 | همواره
296 | هر
297 | اول
298 | خواهند
299 | چهار
300 | نام
301 | امروز
302 | مان
303 | هاي
304 | قبل
305 | كنم
306 | سعي
307 | تازه
308 | را
309 | هستند
310 | زير
311 | جلوي
312 | عنوان
313 | بود
314 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_fi.txt:
--------------------------------------------------------------------------------
 1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
 2 |  | This file is distributed under the BSD License.
 3 |  | See http://snowball.tartarus.org/license.php
 4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
 5 |  |  - Encoding was converted to UTF-8.
 6 |  |  - This notice was added.
 7 |  
 8 | | forms of BE
 9 | 
10 | olla
11 | olen
12 | olet
13 | on
14 | olemme
15 | olette
16 | ovat
17 | ole        | negative form
18 | 
19 | oli
20 | olisi
21 | olisit
22 | olisin
23 | olisimme
24 | olisitte
25 | olisivat
26 | olit
27 | olin
28 | olimme
29 | olitte
30 | olivat
31 | ollut
32 | olleet
33 | 
34 | en         | negation
35 | et
36 | ei
37 | emme
38 | ette
39 | eivät
40 | 
41 | |Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
42 | minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
43 | sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
44 | hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
45 | me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
46 | te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
47 | he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
48 | 
49 | tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
50 | tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
51 | se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
52 | nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
53 | nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
54 | ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
55 | 
56 | kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
57 | ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
58 | mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
59 | mitkä                                                                                    | (pl)
60 | 
61 | joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
62 | jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
63 | 
64 | | conjunctions
65 | 
66 | että   | that
67 | ja     | and
68 | jos    | if
69 | koska  | because
70 | kuin   | than
71 | mutta  | but
72 | niin   | so
73 | sekä   | and
74 | sillä  | for
75 | tai    | or
76 | vaan   | but
77 | vai    | or
78 | vaikka | although
79 | 
80 | 
81 | | prepositions
82 | 
83 | kanssa  | with
84 | mukaan  | according to
85 | noin    | about
86 | poikki  | across
87 | yli     | over, across
88 | 
89 | | other
90 | 
91 | kun    | when
92 | niin   | so
93 | nyt    | now
94 | itse   | self
95 | 
96 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_fr.txt:
--------------------------------------------------------------------------------
  1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
  2 |  | This file is distributed under the BSD License.
  3 |  | See http://snowball.tartarus.org/license.php
  4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
  5 |  |  - Encoding was converted to UTF-8.
  6 |  |  - This notice was added.
  7 | 
  8 |  | A French stop word list. Comments begin with vertical bar. Each stop
  9 |  | word is at the start of a line.
 10 | 
 11 | au             |  a + le
 12 | aux            |  a + les
 13 | avec           |  with
 14 | ce             |  this
 15 | ces            |  these
 16 | dans           |  with
 17 | de             |  of
 18 | des            |  de + les
 19 | du             |  de + le
 20 | elle           |  she
 21 | en             |  `of them' etc
 22 | et             |  and
 23 | eux            |  them
 24 | il             |  he
 25 | je             |  I
 26 | la             |  the
 27 | le             |  the
 28 | leur           |  their
 29 | lui            |  him
 30 | ma             |  my (fem)
 31 | mais           |  but
 32 | me             |  me
 33 | même           |  same; as in moi-même (myself) etc
 34 | mes            |  me (pl)
 35 | moi            |  me
 36 | mon            |  my (masc)
 37 | ne             |  not
 38 | nos            |  our (pl)
 39 | notre          |  our
 40 | nous           |  we
 41 | on             |  one
 42 | ou             |  where
 43 | par            |  by
 44 | pas            |  not
 45 | pour           |  for
 46 | qu             |  que before vowel
 47 | que            |  that
 48 | qui            |  who
 49 | sa             |  his, her (fem)
 50 | se             |  oneself
 51 | ses            |  his (pl)
 52 | son            |  his, her (masc)
 53 | sur            |  on
 54 | ta             |  thy (fem)
 55 | te             |  thee
 56 | tes            |  thy (pl)
 57 | toi            |  thee
 58 | ton            |  thy (masc)
 59 | tu             |  thou
 60 | un             |  a
 61 | une            |  a
 62 | vos            |  your (pl)
 63 | votre          |  your
 64 | vous           |  you
 65 | 
 66 |                |  single letter forms
 67 | 
 68 | c              |  c'
 69 | d              |  d'
 70 | j              |  j'
 71 | l              |  l'
 72 | à              |  to, at
 73 | m              |  m'
 74 | n              |  n'
 75 | s              |  s'
 76 | t              |  t'
 77 | y              |  there
 78 | 
 79 |                | forms of être (not including the infinitive):
 80 | été
 81 | étée
 82 | étées
 83 | étés
 84 | étant
 85 | suis
 86 | es
 87 | est
 88 | sommes
 89 | êtes
 90 | sont
 91 | serai
 92 | seras
 93 | sera
 94 | serons
 95 | serez
 96 | seront
 97 | serais
 98 | serait
 99 | serions
100 | seriez
101 | seraient
102 | étais
103 | était
104 | étions
105 | étiez
106 | étaient
107 | fus
108 | fut
109 | fûmes
110 | fûtes
111 | furent
112 | sois
113 | soit
114 | soyons
115 | soyez
116 | soient
117 | fusse
118 | fusses
119 | fût
120 | fussions
121 | fussiez
122 | fussent
123 | 
124 |                | forms of avoir (not including the infinitive):
125 | ayant
126 | eu
127 | eue
128 | eues
129 | eus
130 | ai
131 | as
132 | avons
133 | avez
134 | ont
135 | aurai
136 | auras
137 | aura
138 | aurons
139 | aurez
140 | auront
141 | aurais
142 | aurait
143 | aurions
144 | auriez
145 | auraient
146 | avais
147 | avait
148 | avions
149 | aviez
150 | avaient
151 | eut
152 | eûmes
153 | eûtes
154 | eurent
155 | aie
156 | aies
157 | ait
158 | ayons
159 | ayez
160 | aient
161 | eusse
162 | eusses
163 | eût
164 | eussions
165 | eussiez
166 | eussent
167 | 
168 |                | Later additions (from Jean-Christophe Deschamps)
169 | ceci           |  this
170 | cela           |  that
171 | celà           |  that
172 | cet            |  this
173 | cette          |  this
174 | ici            |  here
175 | ils            |  they
176 | les            |  the (pl)
177 | leurs          |  their (pl)
178 | quel           |  which
179 | quels          |  which
180 | quelle         |  which
181 | quelles        |  which
182 | sans           |  without
183 | soi            |  oneself
184 | 
185 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_ga.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | a
  3 | ach
  4 | ag
  5 | agus
  6 | an
  7 | aon
  8 | ar
  9 | arna
 10 | as
 11 | b'
 12 | ba
 13 | beirt
 14 | bhúr
 15 | caoga
 16 | ceathair
 17 | ceathrar
 18 | chomh
 19 | chtó
 20 | chuig
 21 | chun
 22 | cois
 23 | céad
 24 | cúig
 25 | cúigear
 26 | d'
 27 | daichead
 28 | dar
 29 | de
 30 | deich
 31 | deichniúr
 32 | den
 33 | dhá
 34 | do
 35 | don
 36 | dtí
 37 | dá
 38 | dár
 39 | dó
 40 | faoi
 41 | faoin
 42 | faoina
 43 | faoinár
 44 | fara
 45 | fiche
 46 | gach
 47 | gan
 48 | go
 49 | gur
 50 | haon
 51 | hocht
 52 | i
 53 | iad
 54 | idir
 55 | in
 56 | ina
 57 | ins
 58 | inár
 59 | is
 60 | le
 61 | leis
 62 | lena
 63 | lenár
 64 | m'
 65 | mar
 66 | mo
 67 | mé
 68 | na
 69 | nach
 70 | naoi
 71 | naonúr
 72 | ná
 73 | ní
 74 | níor
 75 | nó
 76 | nócha
 77 | ocht
 78 | ochtar
 79 | os
 80 | roimh
 81 | sa
 82 | seacht
 83 | seachtar
 84 | seachtó
 85 | seasca
 86 | seisear
 87 | siad
 88 | sibh
 89 | sinn
 90 | sna
 91 | sé
 92 | sí
 93 | tar
 94 | thar
 95 | thú
 96 | triúr
 97 | trí
 98 | trína
 99 | trínár
100 | tríocha
101 | tú
102 | um
103 | ár
104 | é
105 | éis
106 | í
107 | ó
108 | ón
109 | óna
110 | ónár
111 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_gl.txt:
--------------------------------------------------------------------------------
  1 | # galican stopwords
  2 | a
  3 | aínda
  4 | alí
  5 | aquel
  6 | aquela
  7 | aquelas
  8 | aqueles
  9 | aquilo
 10 | aquí
 11 | ao
 12 | aos
 13 | as
 14 | así
 15 | á
 16 | ben
 17 | cando
 18 | che
 19 | co
 20 | coa
 21 | comigo
 22 | con
 23 | connosco
 24 | contigo
 25 | convosco
 26 | coas
 27 | cos
 28 | cun
 29 | cuns
 30 | cunha
 31 | cunhas
 32 | da
 33 | dalgunha
 34 | dalgunhas
 35 | dalgún
 36 | dalgúns
 37 | das
 38 | de
 39 | del
 40 | dela
 41 | delas
 42 | deles
 43 | desde
 44 | deste
 45 | do
 46 | dos
 47 | dun
 48 | duns
 49 | dunha
 50 | dunhas
 51 | e
 52 | el
 53 | ela
 54 | elas
 55 | eles
 56 | en
 57 | era
 58 | eran
 59 | esa
 60 | esas
 61 | ese
 62 | eses
 63 | esta
 64 | estar
 65 | estaba
 66 | está
 67 | están
 68 | este
 69 | estes
 70 | estiven
 71 | estou
 72 | eu
 73 | é
 74 | facer
 75 | foi
 76 | foron
 77 | fun
 78 | había
 79 | hai
 80 | iso
 81 | isto
 82 | la
 83 | las
 84 | lle
 85 | lles
 86 | lo
 87 | los
 88 | mais
 89 | me
 90 | meu
 91 | meus
 92 | min
 93 | miña
 94 | miñas
 95 | moi
 96 | na
 97 | nas
 98 | neste
 99 | nin
100 | no
101 | non
102 | nos
103 | nosa
104 | nosas
105 | noso
106 | nosos
107 | nós
108 | nun
109 | nunha
110 | nuns
111 | nunhas
112 | o
113 | os
114 | ou
115 | ó
116 | ós
117 | para
118 | pero
119 | pode
120 | pois
121 | pola
122 | polas
123 | polo
124 | polos
125 | por
126 | que
127 | se
128 | senón
129 | ser
130 | seu
131 | seus
132 | sexa
133 | sido
134 | sobre
135 | súa
136 | súas
137 | tamén
138 | tan
139 | te
140 | ten
141 | teñen
142 | teño
143 | ter
144 | teu
145 | teus
146 | ti
147 | tido
148 | tiña
149 | tiven
150 | túa
151 | túas
152 | un
153 | unha
154 | unhas
155 | uns
156 | vos
157 | vosa
158 | vosas
159 | voso
160 | vosos
161 | vós
162 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_hi.txt:
--------------------------------------------------------------------------------
  1 | # Also see http://www.opensource.org/licenses/bsd-license.html
  2 | # See http://members.unine.ch/jacques.savoy/clef/index.html.
  3 | # This file was created by Jacques Savoy and is distributed under the BSD license.
  4 | # Note: by default this file also contains forms normalized by HindiNormalizer 
  5 | # for spelling variation (see section below), such that it can be used whether or 
  6 | # not you enable that feature. When adding additional entries to this list,
  7 | # please add the normalized form as well. 
  8 | अंदर
  9 | अत
 10 | अपना
 11 | अपनी
 12 | अपने
 13 | अभी
 14 | आदि
 15 | आप
 16 | इत्यादि
 17 | इन 
 18 | इनका
 19 | इन्हीं
 20 | इन्हें
 21 | इन्हों
 22 | इस
 23 | इसका
 24 | इसकी
 25 | इसके
 26 | इसमें
 27 | इसी
 28 | इसे
 29 | उन
 30 | उनका
 31 | उनकी
 32 | उनके
 33 | उनको
 34 | उन्हीं
 35 | उन्हें
 36 | उन्हों
 37 | उस
 38 | उसके
 39 | उसी
 40 | उसे
 41 | एक
 42 | एवं
 43 | एस
 44 | ऐसे
 45 | और
 46 | कई
 47 | कर
 48 | करता
 49 | करते
 50 | करना
 51 | करने
 52 | करें
 53 | कहते
 54 | कहा
 55 | का
 56 | काफ़ी
 57 | कि
 58 | कितना
 59 | किन्हें
 60 | किन्हों
 61 | किया
 62 | किर
 63 | किस
 64 | किसी
 65 | किसे
 66 | की
 67 | कुछ
 68 | कुल
 69 | के
 70 | को
 71 | कोई
 72 | कौन
 73 | कौनसा
 74 | गया
 75 | घर
 76 | जब
 77 | जहाँ
 78 | जा
 79 | जितना
 80 | जिन
 81 | जिन्हें
 82 | जिन्हों
 83 | जिस
 84 | जिसे
 85 | जीधर
 86 | जैसा
 87 | जैसे
 88 | जो
 89 | तक
 90 | तब
 91 | तरह
 92 | तिन
 93 | तिन्हें
 94 | तिन्हों
 95 | तिस
 96 | तिसे
 97 | तो
 98 | था
 99 | थी
100 | थे
101 | दबारा
102 | दिया
103 | दुसरा
104 | दूसरे
105 | दो
106 | द्वारा
107 | न
108 | नहीं
109 | ना
110 | निहायत
111 | नीचे
112 | ने
113 | पर
114 | पर  
115 | पहले
116 | पूरा
117 | पे
118 | फिर
119 | बनी
120 | बही
121 | बहुत
122 | बाद
123 | बाला
124 | बिलकुल
125 | भी
126 | भीतर
127 | मगर
128 | मानो
129 | मे
130 | में
131 | यदि
132 | यह
133 | यहाँ
134 | यही
135 | या
136 | यिह 
137 | ये
138 | रखें
139 | रहा
140 | रहे
141 | ऱ्वासा
142 | लिए
143 | लिये
144 | लेकिन
145 | व
146 | वर्ग
147 | वह
148 | वह 
149 | वहाँ
150 | वहीं
151 | वाले
152 | वुह 
153 | वे
154 | वग़ैरह
155 | संग
156 | सकता
157 | सकते
158 | सबसे
159 | सभी
160 | साथ
161 | साबुत
162 | साभ
163 | सारा
164 | से
165 | सो
166 | ही
167 | हुआ
168 | हुई
169 | हुए
170 | है
171 | हैं
172 | हो
173 | होता
174 | होती
175 | होते
176 | होना
177 | होने
178 | # additional normalized forms of the above
179 | अपनि
180 | जेसे
181 | होति
182 | सभि
183 | तिंहों
184 | इंहों
185 | दवारा
186 | इसि
187 | किंहें
188 | थि
189 | उंहों
190 | ओर
191 | जिंहें
192 | वहिं
193 | अभि
194 | बनि
195 | हि
196 | उंहिं
197 | उंहें
198 | हें
199 | वगेरह
200 | एसे
201 | रवासा
202 | कोन
203 | निचे
204 | काफि
205 | उसि
206 | पुरा
207 | भितर
208 | हे
209 | बहि
210 | वहां
211 | कोइ
212 | यहां
213 | जिंहों
214 | तिंहें
215 | किसि
216 | कइ
217 | यहि
218 | इंहिं
219 | जिधर
220 | इंहें
221 | अदि
222 | इतयादि
223 | हुइ
224 | कोनसा
225 | इसकि
226 | दुसरे
227 | जहां
228 | अप
229 | किंहों
230 | उनकि
231 | भि
232 | वरग
233 | हुअ
234 | जेसा
235 | नहिं
236 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_hu.txt:
--------------------------------------------------------------------------------
  1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
  2 |  | This file is distributed under the BSD License.
  3 |  | See http://snowball.tartarus.org/license.php
  4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
  5 |  |  - Encoding was converted to UTF-8.
  6 |  |  - This notice was added.
  7 |  
  8 | | Hungarian stop word list
  9 | | prepared by Anna Tordai
 10 | 
 11 | a
 12 | ahogy
 13 | ahol
 14 | aki
 15 | akik
 16 | akkor
 17 | alatt
 18 | által
 19 | általában
 20 | amely
 21 | amelyek
 22 | amelyekben
 23 | amelyeket
 24 | amelyet
 25 | amelynek
 26 | ami
 27 | amit
 28 | amolyan
 29 | amíg
 30 | amikor
 31 | át
 32 | abban
 33 | ahhoz
 34 | annak
 35 | arra
 36 | arról
 37 | az
 38 | azok
 39 | azon
 40 | azt
 41 | azzal
 42 | azért
 43 | aztán
 44 | azután
 45 | azonban
 46 | bár
 47 | be
 48 | belül
 49 | benne
 50 | cikk
 51 | cikkek
 52 | cikkeket
 53 | csak
 54 | de
 55 | e
 56 | eddig
 57 | egész
 58 | egy
 59 | egyes
 60 | egyetlen
 61 | egyéb
 62 | egyik
 63 | egyre
 64 | ekkor
 65 | el
 66 | elég
 67 | ellen
 68 | elő
 69 | először
 70 | előtt
 71 | első
 72 | én
 73 | éppen
 74 | ebben
 75 | ehhez
 76 | emilyen
 77 | ennek
 78 | erre
 79 | ez
 80 | ezt
 81 | ezek
 82 | ezen
 83 | ezzel
 84 | ezért
 85 | és
 86 | fel
 87 | felé
 88 | hanem
 89 | hiszen
 90 | hogy
 91 | hogyan
 92 | igen
 93 | így
 94 | illetve
 95 | ill.
 96 | ill
 97 | ilyen
 98 | ilyenkor
 99 | ison
100 | ismét
101 | itt
102 | jó
103 | jól
104 | jobban
105 | kell
106 | kellett
107 | keresztül
108 | keressünk
109 | ki
110 | kívül
111 | között
112 | közül
113 | legalább
114 | lehet
115 | lehetett
116 | legyen
117 | lenne
118 | lenni
119 | lesz
120 | lett
121 | maga
122 | magát
123 | majd
124 | majd
125 | már
126 | más
127 | másik
128 | meg
129 | még
130 | mellett
131 | mert
132 | mely
133 | melyek
134 | mi
135 | mit
136 | míg
137 | miért
138 | milyen
139 | mikor
140 | minden
141 | mindent
142 | mindenki
143 | mindig
144 | mint
145 | mintha
146 | mivel
147 | most
148 | nagy
149 | nagyobb
150 | nagyon
151 | ne
152 | néha
153 | nekem
154 | neki
155 | nem
156 | néhány
157 | nélkül
158 | nincs
159 | olyan
160 | ott
161 | össze
162 | ő
163 | ők
164 | őket
165 | pedig
166 | persze
167 | rá
168 | s
169 | saját
170 | sem
171 | semmi
172 | sok
173 | sokat
174 | sokkal
175 | számára
176 | szemben
177 | szerint
178 | szinte
179 | talán
180 | tehát
181 | teljes
182 | tovább
183 | továbbá
184 | több
185 | úgy
186 | ugyanis
187 | új
188 | újabb
189 | újra
190 | után
191 | utána
192 | utolsó
193 | vagy
194 | vagyis
195 | valaki
196 | valami
197 | valamint
198 | való
199 | vagyok
200 | van
201 | vannak
202 | volt
203 | voltam
204 | voltak
205 | voltunk
206 | vissza
207 | vele
208 | viszont
209 | volna
210 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_hy.txt:
--------------------------------------------------------------------------------
 1 | # example set of Armenian stopwords.
 2 | այդ
 3 | այլ
 4 | այն
 5 | այս
 6 | դու
 7 | դուք
 8 | եմ
 9 | են
10 | ենք
11 | ես
12 | եք
13 | է
14 | էի
15 | էին
16 | էինք
17 | էիր
18 | էիք
19 | էր
20 | ըստ
21 | թ
22 | ի
23 | ին
24 | իսկ
25 | իր
26 | կամ
27 | համար
28 | հետ
29 | հետո
30 | մենք
31 | մեջ
32 | մի
33 | ն
34 | նա
35 | նաև
36 | նրա
37 | նրանք
38 | որ
39 | որը
40 | որոնք
41 | որպես
42 | ու
43 | ում
44 | պիտի
45 | վրա
46 | և
47 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_id.txt:
--------------------------------------------------------------------------------
  1 | # from appendix D of: A Study of Stemming Effects on Information
  2 | # Retrieval in Bahasa Indonesia
  3 | ada
  4 | adanya
  5 | adalah
  6 | adapun
  7 | agak
  8 | agaknya
  9 | agar
 10 | akan
 11 | akankah
 12 | akhirnya
 13 | aku
 14 | akulah
 15 | amat
 16 | amatlah
 17 | anda
 18 | andalah
 19 | antar
 20 | diantaranya
 21 | antara
 22 | antaranya
 23 | diantara
 24 | apa
 25 | apaan
 26 | mengapa
 27 | apabila
 28 | apakah
 29 | apalagi
 30 | apatah
 31 | atau
 32 | ataukah
 33 | ataupun
 34 | bagai
 35 | bagaikan
 36 | sebagai
 37 | sebagainya
 38 | bagaimana
 39 | bagaimanapun
 40 | sebagaimana
 41 | bagaimanakah
 42 | bagi
 43 | bahkan
 44 | bahwa
 45 | bahwasanya
 46 | sebaliknya
 47 | banyak
 48 | sebanyak
 49 | beberapa
 50 | seberapa
 51 | begini
 52 | beginian
 53 | beginikah
 54 | beginilah
 55 | sebegini
 56 | begitu
 57 | begitukah
 58 | begitulah
 59 | begitupun
 60 | sebegitu
 61 | belum
 62 | belumlah
 63 | sebelum
 64 | sebelumnya
 65 | sebenarnya
 66 | berapa
 67 | berapakah
 68 | berapalah
 69 | berapapun
 70 | betulkah
 71 | sebetulnya
 72 | biasa
 73 | biasanya
 74 | bila
 75 | bilakah
 76 | bisa
 77 | bisakah
 78 | sebisanya
 79 | boleh
 80 | bolehkah
 81 | bolehlah
 82 | buat
 83 | bukan
 84 | bukankah
 85 | bukanlah
 86 | bukannya
 87 | cuma
 88 | percuma
 89 | dahulu
 90 | dalam
 91 | dan
 92 | dapat
 93 | dari
 94 | daripada
 95 | dekat
 96 | demi
 97 | demikian
 98 | demikianlah
 99 | sedemikian
100 | dengan
101 | depan
102 | di
103 | dia
104 | dialah
105 | dini
106 | diri
107 | dirinya
108 | terdiri
109 | dong
110 | dulu
111 | enggak
112 | enggaknya
113 | entah
114 | entahlah
115 | terhadap
116 | terhadapnya
117 | hal
118 | hampir
119 | hanya
120 | hanyalah
121 | harus
122 | haruslah
123 | harusnya
124 | seharusnya
125 | hendak
126 | hendaklah
127 | hendaknya
128 | hingga
129 | sehingga
130 | ia
131 | ialah
132 | ibarat
133 | ingin
134 | inginkah
135 | inginkan
136 | ini
137 | inikah
138 | inilah
139 | itu
140 | itukah
141 | itulah
142 | jangan
143 | jangankan
144 | janganlah
145 | jika
146 | jikalau
147 | juga
148 | justru
149 | kala
150 | kalau
151 | kalaulah
152 | kalaupun
153 | kalian
154 | kami
155 | kamilah
156 | kamu
157 | kamulah
158 | kan
159 | kapan
160 | kapankah
161 | kapanpun
162 | dikarenakan
163 | karena
164 | karenanya
165 | ke
166 | kecil
167 | kemudian
168 | kenapa
169 | kepada
170 | kepadanya
171 | ketika
172 | seketika
173 | khususnya
174 | kini
175 | kinilah
176 | kiranya
177 | sekiranya
178 | kita
179 | kitalah
180 | kok
181 | lagi
182 | lagian
183 | selagi
184 | lah
185 | lain
186 | lainnya
187 | melainkan
188 | selaku
189 | lalu
190 | melalui
191 | terlalu
192 | lama
193 | lamanya
194 | selama
195 | selama
196 | selamanya
197 | lebih
198 | terlebih
199 | bermacam
200 | macam
201 | semacam
202 | maka
203 | makanya
204 | makin
205 | malah
206 | malahan
207 | mampu
208 | mampukah
209 | mana
210 | manakala
211 | manalagi
212 | masih
213 | masihkah
214 | semasih
215 | masing
216 | mau
217 | maupun
218 | semaunya
219 | memang
220 | mereka
221 | merekalah
222 | meski
223 | meskipun
224 | semula
225 | mungkin
226 | mungkinkah
227 | nah
228 | namun
229 | nanti
230 | nantinya
231 | nyaris
232 | oleh
233 | olehnya
234 | seorang
235 | seseorang
236 | pada
237 | padanya
238 | padahal
239 | paling
240 | sepanjang
241 | pantas
242 | sepantasnya
243 | sepantasnyalah
244 | para
245 | pasti
246 | pastilah
247 | per
248 | pernah
249 | pula
250 | pun
251 | merupakan
252 | rupanya
253 | serupa
254 | saat
255 | saatnya
256 | sesaat
257 | saja
258 | sajalah
259 | saling
260 | bersama
261 | sama
262 | sesama
263 | sambil
264 | sampai
265 | sana
266 | sangat
267 | sangatlah
268 | saya
269 | sayalah
270 | se
271 | sebab
272 | sebabnya
273 | sebuah
274 | tersebut
275 | tersebutlah
276 | sedang
277 | sedangkan
278 | sedikit
279 | sedikitnya
280 | segala
281 | segalanya
282 | segera
283 | sesegera
284 | sejak
285 | sejenak
286 | sekali
287 | sekalian
288 | sekalipun
289 | sesekali
290 | sekaligus
291 | sekarang
292 | sekarang
293 | sekitar
294 | sekitarnya
295 | sela
296 | selain
297 | selalu
298 | seluruh
299 | seluruhnya
300 | semakin
301 | sementara
302 | sempat
303 | semua
304 | semuanya
305 | sendiri
306 | sendirinya
307 | seolah
308 | seperti
309 | sepertinya
310 | sering
311 | seringnya
312 | serta
313 | siapa
314 | siapakah
315 | siapapun
316 | disini
317 | disinilah
318 | sini
319 | sinilah
320 | sesuatu
321 | sesuatunya
322 | suatu
323 | sesudah
324 | sesudahnya
325 | sudah
326 | sudahkah
327 | sudahlah
328 | supaya
329 | tadi
330 | tadinya
331 | tak
332 | tanpa
333 | setelah
334 | telah
335 | tentang
336 | tentu
337 | tentulah
338 | tentunya
339 | tertentu
340 | seterusnya
341 | tapi
342 | tetapi
343 | setiap
344 | tiap
345 | setidaknya
346 | tidak
347 | tidakkah
348 | tidaklah
349 | toh
350 | waduh
351 | wah
352 | wahai
353 | sewaktu
354 | walau
355 | walaupun
356 | wong
357 | yaitu
358 | yakni
359 | yang
360 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_ja.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file defines a stopword set for Japanese.
  3 | #
  4 | # This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
  5 | # Punctuation characters and frequent kanji have mostly been left out.  See LUCENE-3745
  6 | # for frequency lists, etc. that can be useful for making your own set (if desired)
  7 | #
  8 | # Note that there is an overlap between these stopwords and the terms stopped when used
  9 | # in combination with the JapanesePartOfSpeechStopFilter.  When editing this file, note
 10 | # that comments are not allowed on the same line as stopwords.
 11 | #
 12 | # Also note that stopping is done in a case-insensitive manner.  Change your StopFilter
 13 | # configuration if you need case-sensitive stopping.  Lastly, note that stopping is done
 14 | # using the same character width as the entries in this file.  Since this StopFilter is
 15 | # normally done after a CJKWidthFilter in your chain, you would usually want your romaji
 16 | # entries to be in half-width and your kana entries to be in full-width.
 17 | #
 18 | の
 19 | に
 20 | は
 21 | を
 22 | た
 23 | が
 24 | で
 25 | て
 26 | と
 27 | し
 28 | れ
 29 | さ
 30 | ある
 31 | いる
 32 | も
 33 | する
 34 | から
 35 | な
 36 | こと
 37 | として
 38 | い
 39 | や
 40 | れる
 41 | など
 42 | なっ
 43 | ない
 44 | この
 45 | ため
 46 | その
 47 | あっ
 48 | よう
 49 | また
 50 | もの
 51 | という
 52 | あり
 53 | まで
 54 | られ
 55 | なる
 56 | へ
 57 | か
 58 | だ
 59 | これ
 60 | によって
 61 | により
 62 | おり
 63 | より
 64 | による
 65 | ず
 66 | なり
 67 | られる
 68 | において
 69 | ば
 70 | なかっ
 71 | なく
 72 | しかし
 73 | について
 74 | せ
 75 | だっ
 76 | その後
 77 | できる
 78 | それ
 79 | う
 80 | ので
 81 | なお
 82 | のみ
 83 | でき
 84 | き
 85 | つ
 86 | における
 87 | および
 88 | いう
 89 | さらに
 90 | でも
 91 | ら
 92 | たり
 93 | その他
 94 | に関する
 95 | たち
 96 | ます
 97 | ん
 98 | なら
 99 | に対して
100 | 特に
101 | せる
102 | 及び
103 | これら
104 | とき
105 | では
106 | にて
107 | ほか
108 | ながら
109 | うち
110 | そして
111 | とともに
112 | ただし
113 | かつて
114 | それぞれ
115 | または
116 | お
117 | ほど
118 | ものの
119 | に対する
120 | ほとんど
121 | と共に
122 | といった
123 | です
124 | とも
125 | ところ
126 | ここ
127 | ##### End of file
128 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_lv.txt:
--------------------------------------------------------------------------------
  1 | # Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
  2 | # the original list of over 800 forms was refined: 
  3 | #   pronouns, adverbs, interjections were removed
  4 | # 
  5 | # prepositions
  6 | aiz
  7 | ap
  8 | ar
  9 | apakš
 10 | ārpus
 11 | augšpus
 12 | bez
 13 | caur
 14 | dēļ
 15 | gar
 16 | iekš
 17 | iz
 18 | kopš
 19 | labad
 20 | lejpus
 21 | līdz
 22 | no
 23 | otrpus
 24 | pa
 25 | par
 26 | pār
 27 | pēc
 28 | pie
 29 | pirms
 30 | pret
 31 | priekš
 32 | starp
 33 | šaipus
 34 | uz
 35 | viņpus
 36 | virs
 37 | virspus
 38 | zem
 39 | apakšpus
 40 | # Conjunctions
 41 | un
 42 | bet
 43 | jo
 44 | ja
 45 | ka
 46 | lai
 47 | tomēr
 48 | tikko
 49 | turpretī
 50 | arī
 51 | kaut
 52 | gan
 53 | tādēļ
 54 | tā
 55 | ne
 56 | tikvien
 57 | vien
 58 | kā
 59 | ir
 60 | te
 61 | vai
 62 | kamēr
 63 | # Particles
 64 | ar
 65 | diezin
 66 | droši
 67 | diemžēl
 68 | nebūt
 69 | ik
 70 | it
 71 | taču
 72 | nu
 73 | pat
 74 | tiklab
 75 | iekšpus
 76 | nedz
 77 | tik
 78 | nevis
 79 | turpretim
 80 | jeb
 81 | iekam
 82 | iekām
 83 | iekāms
 84 | kolīdz
 85 | līdzko
 86 | tiklīdz
 87 | jebšu
 88 | tālab
 89 | tāpēc
 90 | nekā
 91 | itin
 92 | jā
 93 | jau
 94 | jel
 95 | nē
 96 | nezin
 97 | tad
 98 | tikai
 99 | vis
100 | tak
101 | iekams
102 | vien
103 | # modal verbs
104 | būt  
105 | biju 
106 | biji
107 | bija
108 | bijām
109 | bijāt
110 | esmu
111 | esi
112 | esam
113 | esat 
114 | būšu     
115 | būsi
116 | būs
117 | būsim
118 | būsiet
119 | tikt
120 | tiku
121 | tiki
122 | tika
123 | tikām
124 | tikāt
125 | tieku
126 | tiec
127 | tiek
128 | tiekam
129 | tiekat
130 | tikšu
131 | tiks
132 | tiksim
133 | tiksiet
134 | tapt
135 | tapi
136 | tapāt
137 | topat
138 | tapšu
139 | tapsi
140 | taps
141 | tapsim
142 | tapsiet
143 | kļūt
144 | kļuvu
145 | kļuvi
146 | kļuva
147 | kļuvām
148 | kļuvāt
149 | kļūstu
150 | kļūsti
151 | kļūst
152 | kļūstam
153 | kļūstat
154 | kļūšu
155 | kļūsi
156 | kļūs
157 | kļūsim
158 | kļūsiet
159 | # verbs
160 | varēt
161 | varēju
162 | varējām
163 | varēšu
164 | varēsim
165 | var
166 | varēji
167 | varējāt
168 | varēsi
169 | varēsiet
170 | varat
171 | varēja
172 | varēs
173 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_nl.txt:
--------------------------------------------------------------------------------
  1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
  2 |  | This file is distributed under the BSD License.
  3 |  | See http://snowball.tartarus.org/license.php
  4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
  5 |  |  - Encoding was converted to UTF-8.
  6 |  |  - This notice was added.
  7 | 
  8 |  | A Dutch stop word list. Comments begin with vertical bar. Each stop
  9 |  | word is at the start of a line.
 10 | 
 11 |  | This is a ranked list (commonest to rarest) of stopwords derived from
 12 |  | a large sample of Dutch text.
 13 | 
 14 |  | Dutch stop words frequently exhibit homonym clashes. These are indicated
 15 |  | clearly below.
 16 | 
 17 | de             |  the
 18 | en             |  and
 19 | van            |  of, from
 20 | ik             |  I, the ego
 21 | te             |  (1) chez, at etc, (2) to, (3) too
 22 | dat            |  that, which
 23 | die            |  that, those, who, which
 24 | in             |  in, inside
 25 | een            |  a, an, one
 26 | hij            |  he
 27 | het            |  the, it
 28 | niet           |  not, nothing, naught
 29 | zijn           |  (1) to be, being, (2) his, one's, its
 30 | is             |  is
 31 | was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
 32 | op             |  on, upon, at, in, up, used up
 33 | aan            |  on, upon, to (as dative)
 34 | met            |  with, by
 35 | als            |  like, such as, when
 36 | voor           |  (1) before, in front of, (2) furrow
 37 | had            |  had, past tense all persons sing. of 'hebben' (have)
 38 | er             |  there
 39 | maar           |  but, only
 40 | om             |  round, about, for etc
 41 | hem            |  him
 42 | dan            |  then
 43 | zou            |  should/would, past tense all persons sing. of 'zullen'
 44 | of             |  or, whether, if
 45 | wat            |  what, something, anything
 46 | mijn           |  possessive and noun 'mine'
 47 | men            |  people, 'one'
 48 | dit            |  this
 49 | zo             |  so, thus, in this way
 50 | door           |  through by
 51 | over           |  over, across
 52 | ze             |  she, her, they, them
 53 | zich           |  oneself
 54 | bij            |  (1) a bee, (2) by, near, at
 55 | ook            |  also, too
 56 | tot            |  till, until
 57 | je             |  you
 58 | mij            |  me
 59 | uit            |  out of, from
 60 | der            |  Old Dutch form of 'van der' still found in surnames
 61 | daar           |  (1) there, (2) because
 62 | haar           |  (1) her, their, them, (2) hair
 63 | naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
 64 | heb            |  present first person sing. of 'to have'
 65 | hoe            |  how, why
 66 | heeft          |  present third person sing. of 'to have'
 67 | hebben         |  'to have' and various parts thereof
 68 | deze           |  this
 69 | u              |  you
 70 | want           |  (1) for, (2) mitten, (3) rigging
 71 | nog            |  yet, still
 72 | zal            |  'shall', first and third person sing. of verb 'zullen' (will)
 73 | me             |  me
 74 | zij            |  she, they
 75 | nu             |  now
 76 | ge             |  'thou', still used in Belgium and south Netherlands
 77 | geen           |  none
 78 | omdat          |  because
 79 | iets           |  something, somewhat
 80 | worden         |  to become, grow, get
 81 | toch           |  yet, still
 82 | al             |  all, every, each
 83 | waren          |  (1) 'were' (2) to wander, (3) wares, (3)
 84 | veel           |  much, many
 85 | meer           |  (1) more, (2) lake
 86 | doen           |  to do, to make
 87 | toen           |  then, when
 88 | moet           |  noun 'spot/mote' and present form of 'to must'
 89 | ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
 90 | zonder         |  without
 91 | kan            |  noun 'can' and present form of 'to be able'
 92 | hun            |  their, them
 93 | dus            |  so, consequently
 94 | alles          |  all, everything, anything
 95 | onder          |  under, beneath
 96 | ja             |  yes, of course
 97 | eens           |  once, one day
 98 | hier           |  here
 99 | wie            |  who
100 | werd           |  imperfect third person sing. of 'become'
101 | altijd         |  always
102 | doch           |  yet, but etc
103 | wordt          |  present third person sing. of 'become'
104 | wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
105 | kunnen         |  to be able
106 | ons            |  us/our
107 | zelf           |  self
108 | tegen          |  against, towards, at
109 | na             |  after, near
110 | reeds          |  already
111 | wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
112 | kon            |  could; past tense of 'to be able'
113 | niets          |  nothing
114 | uw             |  your
115 | iemand         |  somebody
116 | geweest        |  been; past participle of 'be'
117 | andere         |  other
118 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_no.txt:
--------------------------------------------------------------------------------
  1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
  2 |  | This file is distributed under the BSD License.
  3 |  | See http://snowball.tartarus.org/license.php
  4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
  5 |  |  - Encoding was converted to UTF-8.
  6 |  |  - This notice was added.
  7 | 
  8 |  | A Norwegian stop word list. Comments begin with vertical bar. Each stop
  9 |  | word is at the start of a line.
 10 | 
 11 |  | This stop word list is for the dominant bokmål dialect. Words unique
 12 |  | to nynorsk are marked *.
 13 | 
 14 |  | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
 15 | 
 16 | og             | and
 17 | i              | in
 18 | jeg            | I
 19 | det            | it/this/that
 20 | at             | to (w. inf.)
 21 | en             | a/an
 22 | et             | a/an
 23 | den            | it/this/that
 24 | til            | to
 25 | er             | is/am/are
 26 | som            | who/that
 27 | på             | on
 28 | de             | they / you(formal)
 29 | med            | with
 30 | han            | he
 31 | av             | of
 32 | ikke           | not
 33 | ikkje          | not *
 34 | der            | there
 35 | så             | so
 36 | var            | was/were
 37 | meg            | me
 38 | seg            | you
 39 | men            | but
 40 | ett            | one
 41 | har            | have
 42 | om             | about
 43 | vi             | we
 44 | min            | my
 45 | mitt           | my
 46 | ha             | have
 47 | hadde          | had
 48 | hun            | she
 49 | nå             | now
 50 | over           | over
 51 | da             | when/as
 52 | ved            | by/know
 53 | fra            | from
 54 | du             | you
 55 | ut             | out
 56 | sin            | your
 57 | dem            | them
 58 | oss            | us
 59 | opp            | up
 60 | man            | you/one
 61 | kan            | can
 62 | hans           | his
 63 | hvor           | where
 64 | eller          | or
 65 | hva            | what
 66 | skal           | shall/must
 67 | selv           | self (reflective)
 68 | sjøl           | self (reflective)
 69 | her            | here
 70 | alle           | all
 71 | vil            | will
 72 | bli            | become
 73 | ble            | became
 74 | blei           | became *
 75 | blitt          | have become
 76 | kunne          | could
 77 | inn            | in
 78 | når            | when
 79 | være           | be
 80 | kom            | come
 81 | noen           | some
 82 | noe            | some
 83 | ville          | would
 84 | dere           | you
 85 | som            | who/which/that
 86 | deres          | their/theirs
 87 | kun            | only/just
 88 | ja             | yes
 89 | etter          | after
 90 | ned            | down
 91 | skulle         | should
 92 | denne          | this
 93 | for            | for/because
 94 | deg            | you
 95 | si             | hers/his
 96 | sine           | hers/his
 97 | sitt           | hers/his
 98 | mot            | against
 99 | å              | to
100 | meget          | much
101 | hvorfor        | why
102 | dette          | this
103 | disse          | these/those
104 | uten           | without
105 | hvordan        | how
106 | ingen          | none
107 | din            | your
108 | ditt           | your
109 | blir           | become
110 | samme          | same
111 | hvilken        | which
112 | hvilke         | which (plural)
113 | sånn           | such a
114 | inni           | inside/within
115 | mellom         | between
116 | vår            | our
117 | hver           | each
118 | hvem           | who
119 | vors           | us/ours
120 | hvis           | whose
121 | både           | both
122 | bare           | only/just
123 | enn            | than
124 | fordi          | as/because
125 | før            | before
126 | mange          | many
127 | også           | also
128 | slik           | just
129 | vært           | been
130 | være           | to be
131 | båe            | both *
132 | begge          | both
133 | siden          | since
134 | dykk           | your *
135 | dykkar         | yours *
136 | dei            | they *
137 | deira          | them *
138 | deires         | theirs *
139 | deim           | them *
140 | di             | your (fem.) *
141 | då             | as/when *
142 | eg             | I *
143 | ein            | a/an *
144 | eit            | a/an *
145 | eitt           | a/an *
146 | elles          | or *
147 | honom          | he *
148 | hjå            | at *
149 | ho             | she *
150 | hoe            | she *
151 | henne          | her
152 | hennar         | her/hers
153 | hennes         | hers
154 | hoss           | how *
155 | hossen         | how *
156 | ikkje          | not *
157 | ingi           | noone *
158 | inkje          | noone *
159 | korleis        | how *
160 | korso          | how *
161 | kva            | what/which *
162 | kvar           | where *
163 | kvarhelst      | where *
164 | kven           | who/whom *
165 | kvi            | why *
166 | kvifor         | why *
167 | me             | we *
168 | medan          | while *
169 | mi             | my *
170 | mine           | my *
171 | mykje          | much *
172 | no             | now *
173 | nokon          | some (masc./neut.) *
174 | noka           | some (fem.) *
175 | nokor          | some *
176 | noko           | some *
177 | nokre          | some *
178 | si             | his/hers *
179 | sia            | since *
180 | sidan          | since *
181 | so             | so *
182 | somt           | some *
183 | somme          | some *
184 | um             | about*
185 | upp            | up *
186 | vere           | be *
187 | vore           | was *
188 | verte          | become *
189 | vort           | become *
190 | varte          | became *
191 | vart           | became *
192 | 
193 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_pt.txt:
--------------------------------------------------------------------------------
  1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
  2 |  | This file is distributed under the BSD License.
  3 |  | See http://snowball.tartarus.org/license.php
  4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
  5 |  |  - Encoding was converted to UTF-8.
  6 |  |  - This notice was added.
  7 | 
  8 |  | A Portuguese stop word list. Comments begin with vertical bar. Each stop
  9 |  | word is at the start of a line.
 10 | 
 11 | 
 12 |  | The following is a ranked list (commonest to rarest) of stopwords
 13 |  | deriving from a large sample of text.
 14 | 
 15 |  | Extra words have been added at the end.
 16 | 
 17 | de             |  of, from
 18 | a              |  the; to, at; her
 19 | o              |  the; him
 20 | que            |  who, that
 21 | e              |  and
 22 | do             |  de + o
 23 | da             |  de + a
 24 | em             |  in
 25 | um             |  a
 26 | para           |  for
 27 |   | é          from SER
 28 | com            |  with
 29 | não            |  not, no
 30 | uma            |  a
 31 | os             |  the; them
 32 | no             |  em + o
 33 | se             |  himself etc
 34 | na             |  em + a
 35 | por            |  for
 36 | mais           |  more
 37 | as             |  the; them
 38 | dos            |  de + os
 39 | como           |  as, like
 40 | mas            |  but
 41 |   | foi        from SER
 42 | ao             |  a + o
 43 | ele            |  he
 44 | das            |  de + as
 45 |   | tem        from TER
 46 | à              |  a + a
 47 | seu            |  his
 48 | sua            |  her
 49 | ou             |  or
 50 |   | ser        from SER
 51 | quando         |  when
 52 | muito          |  much
 53 |   | há         from HAV
 54 | nos            |  em + os; us
 55 | já             |  already, now
 56 |   | está       from EST
 57 | eu             |  I
 58 | também         |  also
 59 | só             |  only, just
 60 | pelo           |  per + o
 61 | pela           |  per + a
 62 | até            |  up to
 63 | isso           |  that
 64 | ela            |  he
 65 | entre          |  between
 66 |   | era        from SER
 67 | depois         |  after
 68 | sem            |  without
 69 | mesmo          |  same
 70 | aos            |  a + os
 71 |   | ter        from TER
 72 | seus           |  his
 73 | quem           |  whom
 74 | nas            |  em + as
 75 | me             |  me
 76 | esse           |  that
 77 | eles           |  they
 78 |   | estão      from EST
 79 | você           |  you
 80 |   | tinha      from TER
 81 |   | foram      from SER
 82 | essa           |  that
 83 | num            |  em + um
 84 | nem            |  nor
 85 | suas           |  her
 86 | meu            |  my
 87 | às             |  a + as
 88 | minha          |  my
 89 |   | têm        from TER
 90 | numa           |  em + uma
 91 | pelos          |  per + os
 92 | elas           |  they
 93 |   | havia      from HAV
 94 |   | seja       from SER
 95 | qual           |  which
 96 |   | será       from SER
 97 | nós            |  we
 98 |   | tenho      from TER
 99 | lhe            |  to him, her
100 | deles          |  of them
101 | essas          |  those
102 | esses          |  those
103 | pelas          |  per + as
104 | este           |  this
105 |   | fosse      from SER
106 | dele           |  of him
107 | 
108 |  | other words. There are many contractions such as naquele = em+aquele,
109 |  | mo = me+o, but they are rare.
110 |  | Indefinite article plural forms are also rare.
111 | 
112 | tu             |  thou
113 | te             |  thee
114 | vocês          |  you (plural)
115 | vos            |  you
116 | lhes           |  to them
117 | meus           |  my
118 | minhas
119 | teu            |  thy
120 | tua
121 | teus
122 | tuas
123 | nosso          | our
124 | nossa
125 | nossos
126 | nossas
127 | 
128 | dela           |  of her
129 | delas          |  of them
130 | 
131 | esta           |  this
132 | estes          |  these
133 | estas          |  these
134 | aquele         |  that
135 | aquela         |  that
136 | aqueles        |  those
137 | aquelas        |  those
138 | isto           |  this
139 | aquilo         |  that
140 | 
141 |                | forms of estar, to be (not including the infinitive):
142 | estou
143 | está
144 | estamos
145 | estão
146 | estive
147 | esteve
148 | estivemos
149 | estiveram
150 | estava
151 | estávamos
152 | estavam
153 | estivera
154 | estivéramos
155 | esteja
156 | estejamos
157 | estejam
158 | estivesse
159 | estivéssemos
160 | estivessem
161 | estiver
162 | estivermos
163 | estiverem
164 | 
165 |                | forms of haver, to have (not including the infinitive):
166 | hei
167 | há
168 | havemos
169 | hão
170 | houve
171 | houvemos
172 | houveram
173 | houvera
174 | houvéramos
175 | haja
176 | hajamos
177 | hajam
178 | houvesse
179 | houvéssemos
180 | houvessem
181 | houver
182 | houvermos
183 | houverem
184 | houverei
185 | houverá
186 | houveremos
187 | houverão
188 | houveria
189 | houveríamos
190 | houveriam
191 | 
192 |                | forms of ser, to be (not including the infinitive):
193 | sou
194 | somos
195 | são
196 | era
197 | éramos
198 | eram
199 | fui
200 | foi
201 | fomos
202 | foram
203 | fora
204 | fôramos
205 | seja
206 | sejamos
207 | sejam
208 | fosse
209 | fôssemos
210 | fossem
211 | for
212 | formos
213 | forem
214 | serei
215 | será
216 | seremos
217 | serão
218 | seria
219 | seríamos
220 | seriam
221 | 
222 |                | forms of ter, to have (not including the infinitive):
223 | tenho
224 | tem
225 | temos
226 | tém
227 | tinha
228 | tínhamos
229 | tinham
230 | tive
231 | teve
232 | tivemos
233 | tiveram
234 | tivera
235 | tivéramos
236 | tenha
237 | tenhamos
238 | tenham
239 | tivesse
240 | tivéssemos
241 | tivessem
242 | tiver
243 | tivermos
244 | tiverem
245 | terei
246 | terá
247 | teremos
248 | terão
249 | teria
250 | teríamos
251 | teriam
252 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_ro.txt:
--------------------------------------------------------------------------------
  1 | # This file was created by Jacques Savoy and is distributed under the BSD license.
  2 | # See http://members.unine.ch/jacques.savoy/clef/index.html.
  3 | # Also see http://www.opensource.org/licenses/bsd-license.html
  4 | acea
  5 | aceasta
  6 | această
  7 | aceea
  8 | acei
  9 | aceia
 10 | acel
 11 | acela
 12 | acele
 13 | acelea
 14 | acest
 15 | acesta
 16 | aceste
 17 | acestea
 18 | aceşti
 19 | aceştia
 20 | acolo
 21 | acum
 22 | ai
 23 | aia
 24 | aibă
 25 | aici
 26 | al
 27 | ăla
 28 | ale
 29 | alea
 30 | ălea
 31 | altceva
 32 | altcineva
 33 | am
 34 | ar
 35 | are
 36 | aş
 37 | aşadar
 38 | asemenea
 39 | asta
 40 | ăsta
 41 | astăzi
 42 | astea
 43 | ăstea
 44 | ăştia
 45 | asupra
 46 | aţi
 47 | au
 48 | avea
 49 | avem
 50 | aveţi
 51 | azi
 52 | bine
 53 | bucur
 54 | bună
 55 | ca
 56 | că
 57 | căci
 58 | când
 59 | care
 60 | cărei
 61 | căror
 62 | cărui
 63 | cât
 64 | câte
 65 | câţi
 66 | către
 67 | câtva
 68 | ce
 69 | cel
 70 | ceva
 71 | chiar
 72 | cînd
 73 | cine
 74 | cineva
 75 | cît
 76 | cîte
 77 | cîţi
 78 | cîtva
 79 | contra
 80 | cu
 81 | cum
 82 | cumva
 83 | curând
 84 | curînd
 85 | da
 86 | dă
 87 | dacă
 88 | dar
 89 | datorită
 90 | de
 91 | deci
 92 | deja
 93 | deoarece
 94 | departe
 95 | deşi
 96 | din
 97 | dinaintea
 98 | dintr
 99 | dintre
100 | drept
101 | după
102 | ea
103 | ei
104 | el
105 | ele
106 | eram
107 | este
108 | eşti
109 | eu
110 | face
111 | fără
112 | fi
113 | fie
114 | fiecare
115 | fii
116 | fim
117 | fiţi
118 | iar
119 | ieri
120 | îi
121 | îl
122 | îmi
123 | împotriva
124 | în 
125 | înainte
126 | înaintea
127 | încât
128 | încît
129 | încotro
130 | între
131 | întrucât
132 | întrucît
133 | îţi
134 | la
135 | lângă
136 | le
137 | li
138 | lîngă
139 | lor
140 | lui
141 | mă
142 | mâine
143 | mea
144 | mei
145 | mele
146 | mereu
147 | meu
148 | mi
149 | mine
150 | mult
151 | multă
152 | mulţi
153 | ne
154 | nicăieri
155 | nici
156 | nimeni
157 | nişte
158 | noastră
159 | noastre
160 | noi
161 | noştri
162 | nostru
163 | nu
164 | ori
165 | oricând
166 | oricare
167 | oricât
168 | orice
169 | oricînd
170 | oricine
171 | oricît
172 | oricum
173 | oriunde
174 | până
175 | pe
176 | pentru
177 | peste
178 | pînă
179 | poate
180 | pot
181 | prea
182 | prima
183 | primul
184 | prin
185 | printr
186 | sa
187 | să
188 | săi
189 | sale
190 | sau
191 | său
192 | se
193 | şi
194 | sînt
195 | sîntem
196 | sînteţi
197 | spre
198 | sub
199 | sunt
200 | suntem
201 | sunteţi
202 | ta
203 | tăi
204 | tale
205 | tău
206 | te
207 | ţi
208 | ţie
209 | tine
210 | toată
211 | toate
212 | tot
213 | toţi
214 | totuşi
215 | tu
216 | un
217 | una
218 | unde
219 | undeva
220 | unei
221 | unele
222 | uneori
223 | unor
224 | vă
225 | vi
226 | voastră
227 | voastre
228 | voi
229 | voştri
230 | vostru
231 | vouă
232 | vreo
233 | vreun
234 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_sv.txt:
--------------------------------------------------------------------------------
  1 |  | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
  2 |  | This file is distributed under the BSD License.
  3 |  | See http://snowball.tartarus.org/license.php
  4 |  | Also see http://www.opensource.org/licenses/bsd-license.html
  5 |  |  - Encoding was converted to UTF-8.
  6 |  |  - This notice was added.
  7 | 
  8 |  | A Swedish stop word list. Comments begin with vertical bar. Each stop
  9 |  | word is at the start of a line.
 10 | 
 11 |  | This is a ranked list (commonest to rarest) of stopwords derived from
 12 |  | a large text sample.
 13 | 
 14 |  | Swedish stop words occasionally exhibit homonym clashes. For example
 15 |  |  så = so, but also seed. These are indicated clearly below.
 16 | 
 17 | och            | and
 18 | det            | it, this/that
 19 | att            | to (with infinitive)
 20 | i              | in, at
 21 | en             | a
 22 | jag            | I
 23 | hon            | she
 24 | som            | who, that
 25 | han            | he
 26 | på             | on
 27 | den            | it, this/that
 28 | med            | with
 29 | var            | where, each
 30 | sig            | him(self) etc
 31 | för            | for
 32 | så             | so (also: seed)
 33 | till           | to
 34 | är             | is
 35 | men            | but
 36 | ett            | a
 37 | om             | if; around, about
 38 | hade           | had
 39 | de             | they, these/those
 40 | av             | of
 41 | icke           | not, no
 42 | mig            | me
 43 | du             | you
 44 | henne          | her
 45 | då             | then, when
 46 | sin            | his
 47 | nu             | now
 48 | har            | have
 49 | inte           | inte någon = no one
 50 | hans           | his
 51 | honom          | him
 52 | skulle         | 'sake'
 53 | hennes         | her
 54 | där            | there
 55 | min            | my
 56 | man            | one (pronoun)
 57 | ej             | nor
 58 | vid            | at, by, on (also: vast)
 59 | kunde          | could
 60 | något          | some etc
 61 | från           | from, off
 62 | ut             | out
 63 | när            | when
 64 | efter          | after, behind
 65 | upp            | up
 66 | vi             | we
 67 | dem            | them
 68 | vara           | be
 69 | vad            | what
 70 | över           | over
 71 | än             | than
 72 | dig            | you
 73 | kan            | can
 74 | sina           | his
 75 | här            | here
 76 | ha             | have
 77 | mot            | towards
 78 | alla           | all
 79 | under          | under (also: wonder)
 80 | någon          | some etc
 81 | eller          | or (else)
 82 | allt           | all
 83 | mycket         | much
 84 | sedan          | since
 85 | ju             | why
 86 | denna          | this/that
 87 | själv          | myself, yourself etc
 88 | detta          | this/that
 89 | åt             | to
 90 | utan           | without
 91 | varit          | was
 92 | hur            | how
 93 | ingen          | no
 94 | mitt           | my
 95 | ni             | you
 96 | bli            | to be, become
 97 | blev           | from bli
 98 | oss            | us
 99 | din            | thy
100 | dessa          | these/those
101 | några          | some etc
102 | deras          | their
103 | blir           | from bli
104 | mina           | my
105 | samma          | (the) same
106 | vilken         | who, that
107 | er             | you, your
108 | sådan          | such a
109 | vår            | our
110 | blivit         | from bli
111 | dess           | its
112 | inom           | within
113 | mellan         | between
114 | sådant         | such a
115 | varför         | why
116 | varje          | each
117 | vilka          | who, that
118 | ditt           | thy
119 | vem            | who
120 | vilket         | who, that
121 | sitta          | his
122 | sådana         | such a
123 | vart           | each
124 | dina           | thy
125 | vars           | whose
126 | vårt           | our
127 | våra           | our
128 | ert            | your
129 | era            | your
130 | vilkas         | whose
131 | 
132 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_th.txt:
--------------------------------------------------------------------------------
  1 | # Thai stopwords from:
  2 | # "Opinion Detection in Thai Political News Columns
  3 | # Based on Subjectivity Analysis"
  4 | # Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
  5 | ไว้
  6 | ไม่
  7 | ไป
  8 | ได้
  9 | ให้
 10 | ใน
 11 | โดย
 12 | แห่ง
 13 | แล้ว
 14 | และ
 15 | แรก
 16 | แบบ
 17 | แต่
 18 | เอง
 19 | เห็น
 20 | เลย
 21 | เริ่ม
 22 | เรา
 23 | เมื่อ
 24 | เพื่อ
 25 | เพราะ
 26 | เป็นการ
 27 | เป็น
 28 | เปิดเผย
 29 | เปิด
 30 | เนื่องจาก
 31 | เดียวกัน
 32 | เดียว
 33 | เช่น
 34 | เฉพาะ
 35 | เคย
 36 | เข้า
 37 | เขา
 38 | อีก
 39 | อาจ
 40 | อะไร
 41 | ออก
 42 | อย่าง
 43 | อยู่
 44 | อยาก
 45 | หาก
 46 | หลาย
 47 | หลังจาก
 48 | หลัง
 49 | หรือ
 50 | หนึ่ง
 51 | ส่วน
 52 | ส่ง
 53 | สุด
 54 | สําหรับ
 55 | ว่า
 56 | วัน
 57 | ลง
 58 | ร่วม
 59 | ราย
 60 | รับ
 61 | ระหว่าง
 62 | รวม
 63 | ยัง
 64 | มี
 65 | มาก
 66 | มา
 67 | พร้อม
 68 | พบ
 69 | ผ่าน
 70 | ผล
 71 | บาง
 72 | น่า
 73 | นี้
 74 | นํา
 75 | นั้น
 76 | นัก
 77 | นอกจาก
 78 | ทุก
 79 | ที่สุด
 80 | ที่
 81 | ทําให้
 82 | ทํา
 83 | ทาง
 84 | ทั้งนี้
 85 | ทั้ง
 86 | ถ้า
 87 | ถูก
 88 | ถึง
 89 | ต้อง
 90 | ต่างๆ
 91 | ต่าง
 92 | ต่อ
 93 | ตาม
 94 | ตั้งแต่
 95 | ตั้ง
 96 | ด้าน
 97 | ด้วย
 98 | ดัง
 99 | ซึ่ง
100 | ช่วง
101 | จึง
102 | จาก
103 | จัด
104 | จะ
105 | คือ
106 | ความ
107 | ครั้ง
108 | คง
109 | ขึ้น
110 | ของ
111 | ขอ
112 | ขณะ
113 | ก่อน
114 | ก็
115 | การ
116 | กับ
117 | กัน
118 | กว่า
119 | กล่าว
120 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/stopwords_tr.txt:
--------------------------------------------------------------------------------
  1 | # Turkish stopwords from LUCENE-559
  2 | # merged with the list from "Information Retrieval on Turkish Texts"
  3 | #   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
  4 | acaba
  5 | altmış
  6 | altı
  7 | ama
  8 | ancak
  9 | arada
 10 | aslında
 11 | ayrıca
 12 | bana
 13 | bazı
 14 | belki
 15 | ben
 16 | benden
 17 | beni
 18 | benim
 19 | beri
 20 | beş
 21 | bile
 22 | bin
 23 | bir
 24 | birçok
 25 | biri
 26 | birkaç
 27 | birkez
 28 | birşey
 29 | birşeyi
 30 | biz
 31 | bize
 32 | bizden
 33 | bizi
 34 | bizim
 35 | böyle
 36 | böylece
 37 | bu
 38 | buna
 39 | bunda
 40 | bundan
 41 | bunlar
 42 | bunları
 43 | bunların
 44 | bunu
 45 | bunun
 46 | burada
 47 | çok
 48 | çünkü
 49 | da
 50 | daha
 51 | dahi
 52 | de
 53 | defa
 54 | değil
 55 | diğer
 56 | diye
 57 | doksan
 58 | dokuz
 59 | dolayı
 60 | dolayısıyla
 61 | dört
 62 | edecek
 63 | eden
 64 | ederek
 65 | edilecek
 66 | ediliyor
 67 | edilmesi
 68 | ediyor
 69 | eğer
 70 | elli
 71 | en
 72 | etmesi
 73 | etti
 74 | ettiği
 75 | ettiğini
 76 | gibi
 77 | göre
 78 | halen
 79 | hangi
 80 | hatta
 81 | hem
 82 | henüz
 83 | hep
 84 | hepsi
 85 | her
 86 | herhangi
 87 | herkesin
 88 | hiç
 89 | hiçbir
 90 | için
 91 | iki
 92 | ile
 93 | ilgili
 94 | ise
 95 | işte
 96 | itibaren
 97 | itibariyle
 98 | kadar
 99 | karşın
100 | katrilyon
101 | kendi
102 | kendilerine
103 | kendini
104 | kendisi
105 | kendisine
106 | kendisini
107 | kez
108 | ki
109 | kim
110 | kimden
111 | kime
112 | kimi
113 | kimse
114 | kırk
115 | milyar
116 | milyon
117 | mu
118 | mü
119 | mı
120 | nasıl
121 | ne
122 | neden
123 | nedenle
124 | nerde
125 | nerede
126 | nereye
127 | niye
128 | niçin
129 | o
130 | olan
131 | olarak
132 | oldu
133 | olduğu
134 | olduğunu
135 | olduklarını
136 | olmadı
137 | olmadığı
138 | olmak
139 | olması
140 | olmayan
141 | olmaz
142 | olsa
143 | olsun
144 | olup
145 | olur
146 | olursa
147 | oluyor
148 | on
149 | ona
150 | ondan
151 | onlar
152 | onlardan
153 | onları
154 | onların
155 | onu
156 | onun
157 | otuz
158 | oysa
159 | öyle
160 | pek
161 | rağmen
162 | sadece
163 | sanki
164 | sekiz
165 | seksen
166 | sen
167 | senden
168 | seni
169 | senin
170 | siz
171 | sizden
172 | sizi
173 | sizin
174 | şey
175 | şeyden
176 | şeyi
177 | şeyler
178 | şöyle
179 | şu
180 | şuna
181 | şunda
182 | şundan
183 | şunları
184 | şunu
185 | tarafından
186 | trilyon
187 | tüm
188 | üç
189 | üzere
190 | var
191 | vardı
192 | ve
193 | veya
194 | ya
195 | yani
196 | yapacak
197 | yapılan
198 | yapılması
199 | yapıyor
200 | yapmak
201 | yaptı
202 | yaptığı
203 | yaptığını
204 | yaptıkları
205 | yedi
206 | yerine
207 | yetmiş
208 | yine
209 | yirmi
210 | yoksa
211 | yüz
212 | zaten
213 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/lang/userdict_ja.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
 3 | #
 4 | # Add entries to this file in order to override the statistical model in terms
 5 | # of segmentation, readings and part-of-speech tags.  Notice that entries do
 6 | # not have weights since they are always used when found.  This is by-design
 7 | # in order to maximize ease-of-use.
 8 | #
 9 | # Entries are defined using the following CSV format:
10 | #  <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
11 | #
12 | # Notice that a single half-width space separates tokens and readings, and
13 | # that the number tokens and readings must match exactly.
14 | #
15 | # Also notice that multiple entries with the same <text> is undefined.
16 | #
17 | # Whitespace only lines are ignored.  Comments are not allowed on entry lines.
18 | #
19 | 
20 | # Custom segmentation for kanji compounds
21 | 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
22 | 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
23 | 
24 | # Custom segmentation for compound katakana
25 | トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
26 | ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
27 | 
28 | # Custom reading for former sumo wrestler
29 | 朝青龍,朝青龍,アサショウリュウ,カスタム人名
30 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/mapping-ISOLatin1Accent.txt:
--------------------------------------------------------------------------------
  1 | # The ASF licenses this file to You under the Apache License, Version 2.0
  2 | # (the "License"); you may not use this file except in compliance with
  3 | # the License.  You may obtain a copy of the License at
  4 | #
  5 | #     http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | # Syntax:
 14 | #   "source" => "target"
 15 | #     "source".length() > 0 (source cannot be empty.)
 16 | #     "target".length() >= 0 (target can be empty.)
 17 | 
 18 | # example:
 19 | #   "À" => "A"
 20 | #   "\u00C0" => "A"
 21 | #   "\u00C0" => "\u0041"
 22 | #   "ß" => "ss"
 23 | #   "\t" => " "
 24 | #   "\n" => ""
 25 | 
 26 | # À => A
 27 | "\u00C0" => "A"
 28 | 
 29 | # Á => A
 30 | "\u00C1" => "A"
 31 | 
 32 | # Â => A
 33 | "\u00C2" => "A"
 34 | 
 35 | # Ã => A
 36 | "\u00C3" => "A"
 37 | 
 38 | # Ä => A
 39 | "\u00C4" => "A"
 40 | 
 41 | # Å => A
 42 | "\u00C5" => "A"
 43 | 
 44 | # Æ => AE
 45 | "\u00C6" => "AE"
 46 | 
 47 | # Ç => C
 48 | "\u00C7" => "C"
 49 | 
 50 | # È => E
 51 | "\u00C8" => "E"
 52 | 
 53 | # É => E
 54 | "\u00C9" => "E"
 55 | 
 56 | # Ê => E
 57 | "\u00CA" => "E"
 58 | 
 59 | # Ë => E
 60 | "\u00CB" => "E"
 61 | 
 62 | # Ì => I
 63 | "\u00CC" => "I"
 64 | 
 65 | # Í => I
 66 | "\u00CD" => "I"
 67 | 
 68 | # Î => I
 69 | "\u00CE" => "I"
 70 | 
 71 | # Ï => I
 72 | "\u00CF" => "I"
 73 | 
 74 | # Ĳ => IJ
 75 | "\u0132" => "IJ"
 76 | 
 77 | # Ð => D
 78 | "\u00D0" => "D"
 79 | 
 80 | # Ñ => N
 81 | "\u00D1" => "N"
 82 | 
 83 | # Ò => O
 84 | "\u00D2" => "O"
 85 | 
 86 | # Ó => O
 87 | "\u00D3" => "O"
 88 | 
 89 | # Ô => O
 90 | "\u00D4" => "O"
 91 | 
 92 | # Õ => O
 93 | "\u00D5" => "O"
 94 | 
 95 | # Ö => O
 96 | "\u00D6" => "O"
 97 | 
 98 | # Ø => O
 99 | "\u00D8" => "O"
100 | 
101 | # Œ => OE
102 | "\u0152" => "OE"
103 | 
104 | # Þ
105 | "\u00DE" => "TH"
106 | 
107 | # Ù => U
108 | "\u00D9" => "U"
109 | 
110 | # Ú => U
111 | "\u00DA" => "U"
112 | 
113 | # Û => U
114 | "\u00DB" => "U"
115 | 
116 | # Ü => U
117 | "\u00DC" => "U"
118 | 
119 | # Ý => Y
120 | "\u00DD" => "Y"
121 | 
122 | # Ÿ => Y
123 | "\u0178" => "Y"
124 | 
125 | # à => a
126 | "\u00E0" => "a"
127 | 
128 | # á => a
129 | "\u00E1" => "a"
130 | 
131 | # â => a
132 | "\u00E2" => "a"
133 | 
134 | # ã => a
135 | "\u00E3" => "a"
136 | 
137 | # ä => a
138 | "\u00E4" => "a"
139 | 
140 | # å => a
141 | "\u00E5" => "a"
142 | 
143 | # æ => ae
144 | "\u00E6" => "ae"
145 | 
146 | # ç => c
147 | "\u00E7" => "c"
148 | 
149 | # è => e
150 | "\u00E8" => "e"
151 | 
152 | # é => e
153 | "\u00E9" => "e"
154 | 
155 | # ê => e
156 | "\u00EA" => "e"
157 | 
158 | # ë => e
159 | "\u00EB" => "e"
160 | 
161 | # ì => i
162 | "\u00EC" => "i"
163 | 
164 | # í => i
165 | "\u00ED" => "i"
166 | 
167 | # î => i
168 | "\u00EE" => "i"
169 | 
170 | # ï => i
171 | "\u00EF" => "i"
172 | 
173 | # ĳ => ij
174 | "\u0133" => "ij"
175 | 
176 | # ð => d
177 | "\u00F0" => "d"
178 | 
179 | # ñ => n
180 | "\u00F1" => "n"
181 | 
182 | # ò => o
183 | "\u00F2" => "o"
184 | 
185 | # ó => o
186 | "\u00F3" => "o"
187 | 
188 | # ô => o
189 | "\u00F4" => "o"
190 | 
191 | # õ => o
192 | "\u00F5" => "o"
193 | 
194 | # ö => o
195 | "\u00F6" => "o"
196 | 
197 | # ø => o
198 | "\u00F8" => "o"
199 | 
200 | # œ => oe
201 | "\u0153" => "oe"
202 | 
203 | # ß => ss
204 | "\u00DF" => "ss"
205 | 
206 | # þ => th
207 | "\u00FE" => "th"
208 | 
209 | # ù => u
210 | "\u00F9" => "u"
211 | 
212 | # ú => u
213 | "\u00FA" => "u"
214 | 
215 | # û => u
216 | "\u00FB" => "u"
217 | 
218 | # ü => u
219 | "\u00FC" => "u"
220 | 
221 | # ý => y
222 | "\u00FD" => "y"
223 | 
224 | # ÿ => y
225 | "\u00FF" => "y"
226 | 
227 | # ﬀ => ff
228 | "\uFB00" => "ff"
229 | 
230 | # ﬁ => fi
231 | "\uFB01" => "fi"
232 | 
233 | # ﬂ => fl
234 | "\uFB02" => "fl"
235 | 
236 | # ﬃ => ffi
237 | "\uFB03" => "ffi"
238 | 
239 | # ﬄ => ffl
240 | "\uFB04" => "ffl"
241 | 
242 | # ﬅ => ft
243 | "\uFB05" => "ft"
244 | 
245 | # ﬆ => st
246 | "\uFB06" => "st"
247 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/protwords.txt:
--------------------------------------------------------------------------------
 1 | # The ASF licenses this file to You under the Apache License, Version 2.0
 2 | # (the "License"); you may not use this file except in compliance with
 3 | # the License.  You may obtain a copy of the License at
 4 | #
 5 | #     http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | #-----------------------------------------------------------------------
14 | # Use a protected word file to protect against the stemmer reducing two
15 | # unrelated words to the same base word.
16 | 
17 | # Some non-words that normally won't be encountered,
18 | # just to test that they won't be stemmed.
19 | dontstems
20 | zwhacky
21 | 
22 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/scripts.conf:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | user=
17 | solr_hostname=localhost
18 | solr_port=8983
19 | rsyncd_port=18983
20 | data_dir=
21 | webapp_name=solr
22 | master_host=
23 | master_data_dir=
24 | master_status_dir=
25 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/spellings.txt:
--------------------------------------------------------------------------------
1 | pizza
2 | history
3 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/stopwords.txt:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/synonyms.txt:
--------------------------------------------------------------------------------
 1 | # The ASF licenses this file to You under the Apache License, Version 2.0
 2 | # (the "License"); you may not use this file except in compliance with
 3 | # the License.  You may obtain a copy of the License at
 4 | #
 5 | #     http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | #-----------------------------------------------------------------------
14 | #some test synonym mappings unlikely to appear in real input text
15 | aaafoo => aaabar
16 | bbbfoo => bbbfoo bbbbar
17 | cccfoo => cccbar cccbaz
18 | fooaaa,baraaa,bazaaa
19 | 
20 | # Some synonym groups specific to this example
21 | GB,gib,gigabyte,gigabytes
22 | MB,mib,megabyte,megabytes
23 | Television, Televisions, TV, TVs
24 | #notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
25 | #after us won't split it into two words.
26 | 
27 | # Synonym mappings can be used for spelling correction too
28 | pixima => pixma
29 | 
30 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/update-script.js:
--------------------------------------------------------------------------------
 1 | /*
 2 |   This is a basic skeleton JavaScript update processor.
 3 | 
 4 |   In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in
 5 |   the example solrconfig.xml and must be uncommented to be enabled.
 6 | 
 7 |   See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details.
 8 | */
 9 | 
10 | function processAdd(cmd) {
11 | 
12 |   doc = cmd.solrDoc;  // org.apache.solr.common.SolrInputDocument
13 |   id = doc.getFieldValue("id");
14 |   logger.info("update-script#processAdd: id=" + id);
15 | 
16 | // Set a field value:
17 | //  doc.setField("foo_s", "whatever");
18 | 
19 | // Get a configuration parameter:
20 | //  config_param = params.get('config_param');  // "params" only exists if processor configured with <lst name="params">
21 | 
22 | // Get a request parameter:
23 | // some_param = req.getParams().get("some_param")
24 | 
25 | // Add a field of field names that match a pattern:
26 | //   - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss
27 | //  field_names = doc.getFieldNames().toArray();
28 | //  for(i=0; i < field_names.length; i++) {
29 | //    field_name = field_names[i];
30 | //    if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); }
31 | //  }
32 | 
33 | }
34 | 
35 | function processDelete(cmd) {
36 |   // no-op
37 | }
38 | 
39 | function processMergeIndexes(cmd) {
40 |   // no-op
41 | }
42 | 
43 | function processCommit(cmd) {
44 |   // no-op
45 | }
46 | 
47 | function processRollback(cmd) {
48 |   // no-op
49 | }
50 | 
51 | function finish() {
52 |   // no-op
53 | }
54 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/browse.vm:
--------------------------------------------------------------------------------
 1 | #set($searcher=$request.searcher)
 2 | #set($params=$request.params)
 3 | #set($clusters = $response.response.clusters)
 4 | #set($mltResults = $response.response.get("moreLikeThis"))
 5 | #set($annotate = $params.get("annotateBrowse"))
 6 | #parse('query.vm')
 7 | #parse('did_you_mean.vm')
 8 | 
 9 | <div class="navigators">
10 |   #parse("facets.vm")
11 | </div>
12 | 
13 | <div class="pagination">
14 |   #if($response.response.get('grouped'))
15 |     <span><span class="results-found">$response.response.get('grouped').size() group(s)</span> found in ${response.responseHeader.QTime} ms</span>
16 |   #else<span><span class="results-found">$page.results_found</span> results found in ${response.responseHeader.QTime} ms</span>
17 |   Page <span class="page-num">$page.current_page_number</span> of <span
18 |         class="page-count">$page.page_count</span>#end
19 | </div>
20 | 
21 | <div class="error">
22 |   #if(${response.response.error.code})
23 |     <h1>ERROR ${response.response.error.code}</h1>
24 |     ${response.response.error.msg}
25 |   #end
26 | </div>
27 | 
28 | <div class="results">
29 |   #if($response.response.get('grouped'))
30 |     #foreach($grouping in $response.response.get('grouped'))
31 |       #parse("hitGrouped.vm")
32 |     #end
33 |   #else
34 |     #foreach($doc in $response.results)
35 |       #parse("hit.vm")
36 |     #end
37 |   #end
38 | </div>
39 | 
40 | <div class="pagination">
41 |   #if($response.response.get('grouped'))
42 |   #else
43 |   #link_to_previous_page("previous")
44 |   <span class="results-found">$page.results_found</span> results found.
45 |   Page <span class="page-num">$page.current_page_number</span> of <span
46 |         class="page-count">$page.page_count</span>
47 |   #link_to_next_page("next")
48 |   #end
49 |   <br/>
50 | </div>
51 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/cluster.vm:
--------------------------------------------------------------------------------
 1 | <h2 #annTitle("Clusters generated by Carrot2 using the /clustering RequestHandler")>Clusters</h2>
 2 | <div id="clusters">
 3 |   Run Solr with java -Dsolr.clustering.enabled=true -jar start.jar to see results
 4 | </div>
 5 | <script type="text/javascript">
 6 | 
 7 |   $('#clusters').load("#url_for_solr/clustering#lens",
 8 |     {'wt':'velocity', 'v.template':"clusterResults"});
 9 | </script>
10 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/clusterResults.vm:
--------------------------------------------------------------------------------
 1 | #foreach ($clusters in $response.response.clusters)
 2 |     #set($labels = $clusters.get('labels'))
 3 |     #set($docs = $clusters.get('docs'))
 4 |     <h3>#foreach ($label in $labels)$label#if( $foreach.hasNext ),#end#end</h3>
 5 |         <ol>
 6 |         #foreach ($cluDoc in $docs)
 7 |           <li><a href="#url_for_home?q=id:$cluDoc">$cluDoc</a></li>
 8 |         #end
 9 |         </ol>
10 |         
11 |     
12 | #end
13 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/debug.vm:
--------------------------------------------------------------------------------
 1 | #if($params.getBool("debugQuery",false))
 2 |   <a href="#" onclick='jQuery(this).siblings("pre").toggle(); return false;'>toggle explain</a>
 3 |   <pre style="display:none">$response.getExplainMap().get($doc.getFirstValue('id'))</pre>
 4 |   <a href="#" onclick='jQuery(this).siblings("pre2").toggle(); return false;'>toggle all fields</a>
 5 |   <pre2 style="display:none">
 6 |   #foreach($fieldname in $doc.fieldNames)
 7 |      <br>
 8 |        <span class="field-name">$fieldname :</span>
 9 |        <span>
10 |        #foreach($value in $doc.getFieldValues($fieldname))
11 |          $esc.html($value)
12 |        #end
13 |        </span>
14 |   #end
15 |    </br>
16 |   </pre2>
17 | #end
18 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/did_you_mean.vm:
--------------------------------------------------------------------------------
1 | #set($dym = $response.response.spellcheck.suggestions.collation.collationQuery)
2 | #if($dym)
3 |   Did you mean <a href="#{url_for_home}#{lensNoQ}&q=$esc.url($dym)">$esc.html($dym)</a>?
4 | #end
5 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/facet_fields.vm:
--------------------------------------------------------------------------------
 1 | #if($response.facetFields)
 2 |     <h2 #annTitle("Facets generated by adding &facet.field= to the request")>Field Facets</h2>
 3 |     #foreach($field in $response.facetFields)
 4 |       ## Hide facets without value
 5 |       #if($field.values.size() > 0)
 6 |       <span class="facet-field">$field.name</span>
 7 | 
 8 |       <ul>
 9 |         #foreach($facet in $field.values)
10 |             <li><a href="#url_for_facet_filter($field.name, $facet.name)">$facet.name</a> ($facet.count)</li>
11 |         #end
12 |       </ul>
13 |       #end
14 |     #end
15 |   #end
16 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/facet_pivot.vm:
--------------------------------------------------------------------------------
1 | <h2 #annTitle("Facets generated by adding &facet.pivot= to the request")>Pivot Facets</h2>
2 | #set($pivot = $response.response.facet_counts.facet_pivot)
3 | #display_facet_pivot($pivot, "")
4 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/facet_queries.vm:
--------------------------------------------------------------------------------
1 | #set($field = $response.response.facet_counts.facet_queries)
2 | <h2 #annTitle("Facets generated by adding &facet.query= to the request")>Query Facets</h2>        
3 | #display_facet_query($field, "", "")
4 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/facet_ranges.vm:
--------------------------------------------------------------------------------
 1 | <h2 #annTitle("Facets generated by adding &facet.range= to the request")>Range Facets</h2>
 2 | #foreach ($field in $response.response.facet_counts.facet_ranges)
 3 |   ## Hide facets without value
 4 |   #if($field.value.counts.size() > 0)
 5 | 	#set($name = $field.key)
 6 | 	#set($display = $name)
 7 | 	#set($f = $field.value.counts)
 8 | 	#set($start = $field.value.start)
 9 | 	#set($end = $field.value.end)
10 | 	#set($gap = $field.value.gap)
11 | 	#set($before = $field.value.before)
12 | 	#set($after = $field.value.after)
13 | 	#display_facet_range($f, $display, $name, $start, $end, $gap, $before, $after)
14 |   #end
15 | #end
16 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/facets.vm:
--------------------------------------------------------------------------------
1 | #parse('facet_fields.vm')
2 | #parse('facet_queries.vm')
3 | #parse('facet_ranges.vm')
4 | #parse('facet_pivot.vm')
5 | #parse('cluster.vm')
6 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/footer.vm:
--------------------------------------------------------------------------------
 1 | <hr/>
 2 | <div>
 3 |   <span>Options:</span>
 4 |   #if($request.params.get('debugQuery'))
 5 |   <a href="#url_for_home?#q#if($list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end">disable debug</a>
 6 |   #else
 7 |   <a href="#url_for_lens&debugQuery=true&fl=*,score">enable debug</a>
 8 |   #end
 9 |   #if($annotate)
10 |   <a href="#url_for_home?#q#if($list.size($request.params.getParams('fq')) > 0)&#fqs($request.params.getParams('fq'))#end#boostPrice">disable annotation</a>
11 |   #else
12 |   <a href="#url_for_lens&annotateBrowse=true">enable annotation</a>
13 |   #end
14 |   <a #annTitle("Click to switch to an XML response: &wt=xml") href="#url_for_lens&wt=xml#if($request.params.get('debugQuery'))&debugQuery=true#end">XML</a></div>
15 | <div>Generated by <a href="http://wiki.apache.org/solr/VelocityResponseWriter">VelocityResponseWriter</a></div>
16 | <div><span>Documentation: </span> <a href="http://lucene.apache.org/solr">Solr Home Page</a>, <a href="http://wiki.apache.org/solr">Solr Wiki</a></div>
17 | <div>Disclaimer: The locations displayed in this demonstration are purely fictional.  It is more than likely that no store with the items listed actually exists at that location!</div>        
18 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/head.vm:
--------------------------------------------------------------------------------
 1 | 
 2 |     ## An example of using an arbitrary request parameter
 3 |     <title>#param('title')</title>
 4 |     <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
 5 | 
 6 |   <script type="text/javascript" src="#{url_root}/js/lib/jquery-1.7.2.min.js"></script>
 7 |   <link rel="stylesheet" type="text/css" href="#{url_for_solr}/admin/file?file=/velocity/main.css&contentType=text/css"/>
 8 |   <link rel="stylesheet" href="#{url_for_solr}/admin/file?file=/velocity/jquery.autocomplete.css&contentType=text/css" type="text/css" />
 9 |   <script type="text/javascript" src="#{url_for_solr}/admin/file?file=/velocity/jquery.autocomplete.js&contentType=text/javascript"></script>
10 | 
11 | 
12 |     <script>
13 |     $(document).ready(function(){
14 |       $("\#q").autocomplete('#{url_for_solr}/terms', {  ## backslash escaped #q as that is a macro defined in VM_global_library.vm
15 |            extraParams:{
16 |              'terms.prefix': function() { return $("\#q").val();},
17 |              'terms.sort': 'count',
18 |              'terms.fl': 'name',
19 |              'wt': 'velocity',
20 |              'v.template': 'suggest'
21 |            }
22 |          }
23 |       ).keydown(function(e){
24 |         if (e.keyCode === 13){
25 |           $("#query-form").trigger('submit');
26 |         }
27 |       });
28 | 
29 |       // http://localhost:8983/solr/terms?terms.fl=name&terms.prefix=i&terms.sort=count
30 |     });
31 | 
32 |     </script>
33 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/header.vm:
--------------------------------------------------------------------------------
1 | <div id="head">
2 |   <span ><a href="#url_for_home#if($request.params.get('debugQuery'))?debugQuery=true#end"><img src="#{url_root}/img/solr.png" id="logo"/></a></span>
3 | </div>
4 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/hit.vm:
--------------------------------------------------------------------------------
 1 | #set($docId = $doc.getFieldValue('id'))
 2 | 
 3 | <div class="result-document">
 4 | #if($doc.getFieldValue('name'))
 5 |   #parse("product-doc.vm")
 6 | #elseif($doc.getFieldValue('compName_s'))
 7 |   #parse("join-doc.vm")
 8 | #else
 9 |   #parse("richtext-doc.vm")
10 | #end
11 | </div>
12 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/hitGrouped.vm:
--------------------------------------------------------------------------------
 1 | <div class="result-document">
 2 |   <div class="result-title"><b>$grouping.key</b></div>
 3 |   <div>Total Matches in Group: $grouping.value.matches</div>
 4 |   <div>#foreach ($group in $grouping.value.groups)
 5 |     <div class="group-value">$group.groupValue <span #annTitle("The count of the number of documents in this group")>($group.doclist.numFound)</span></div>
 6 |     <div class="group-doclist" #annTitle("Contains the top scoring documents in the group")>
 7 |       #foreach ($doc in $group.doclist)
 8 |         #set($docId = $doc.getFieldValue('id'))
 9 |         #if($doc.getFieldValue('name'))
10 |           #parse("product-doc.vm")
11 |         #elseif($doc.getFieldValue('compName_s'))
12 |           #parse("join-doc.vm")
13 |         #else
14 |           #parse("richtext-doc.vm")
15 |         #end
16 |       #end
17 |     </div>
18 |     #end</div>
19 |   </div>
20 |   #if($params.getBool("debugQuery",false))
21 |     <a href="#" onclick='jQuery(this).siblings("pre").toggle(); return false;'>toggle explain</a>
22 |     <pre style="display:none">$response.getExplainMap().get($doc.getFirstValue('id'))</pre>
23 |   #end
24 | </div>
25 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/join-doc.vm:
--------------------------------------------------------------------------------
1 | <div class="result-title"><b>#field('compName_s')</b></div>
2 | <div>Id: #field('id') (company-details document for <a href="http://wiki.apache.org/solr/Join" target="_new">join</a>)</div>
3 | <div>Address: #field('address_s')</div>
4 | #parse('debug.vm')
5 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/jquery.autocomplete.css:
--------------------------------------------------------------------------------
 1 | .ac_results {
 2 | 	padding: 0px;
 3 | 	border: 1px solid black;
 4 | 	background-color: white;
 5 | 	overflow: hidden;
 6 | 	z-index: 99999;
 7 | }
 8 | 
 9 | .ac_results ul {
10 | 	width: 100%;
11 | 	list-style-position: outside;
12 | 	list-style: none;
13 | 	padding: 0;
14 | 	margin: 0;
15 | }
16 | 
17 | .ac_results li {
18 | 	margin: 0px;
19 | 	padding: 2px 5px;
20 | 	cursor: default;
21 | 	display: block;
22 | 	/* 
23 | 	if width will be 100% horizontal scrollbar will apear 
24 | 	when scroll mode will be used
25 | 	*/
26 | 	/*width: 100%;*/
27 | 	font: menu;
28 | 	font-size: 12px;
29 | 	/* 
30 | 	it is very important, if line-height not setted or setted 
31 | 	in relative units scroll will be broken in firefox
32 | 	*/
33 | 	line-height: 16px;
34 | 	overflow: hidden;
35 | }
36 | 
37 | .ac_loading {
38 | 	background: white url('indicator.gif') right center no-repeat;
39 | }
40 | 
41 | .ac_odd {
42 | 	background-color: #eee;
43 | }
44 | 
45 | .ac_over {
46 | 	background-color: #0A246A;
47 | 	color: white;
48 | }
49 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/layout.vm:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |   #parse("head.vm")
 4 | </head>
 5 |   <body>
 6 |     <div id="admin"><a href="#url_root/#/#core_name">Solr Admin</a></div>
 7 |     <div id="header">
 8 |       #parse("header.vm")
 9 |     </div>
10 |     <div id="tabs">
11 |       #parse("tabs.vm")
12 |     </div>
13 |     <div id="content">
14 |       $content
15 |     </div>
16 |     <div id="footer">
17 |       #parse("footer.vm")
18 |     </div>
19 |   </body>
20 | </html>
21 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/main.css:
--------------------------------------------------------------------------------
  1 | #admin{
  2 |   text-align: right;
  3 |   vertical-align: top; 
  4 | }
  5 | 
  6 | #head{
  7 |   width: 100%;
  8 | }
  9 | .array-field {
 10 |   border: 2px solid #474747;
 11 |   background: #FFE9D8;
 12 |   padding: 5px;
 13 |   margin: 5px;
 14 | }
 15 | 
 16 | .array-field-list li {
 17 |   list-style: circle;
 18 |   margin-left: 20px;
 19 | }
 20 | 
 21 | .parsed_query_header {
 22 |   font-family: Helvetica, Arial, sans-serif;
 23 |   font-size: 10pt;
 24 |   font-weight: bold;
 25 | }
 26 | 
 27 | .parsed_query {
 28 |   font-family: Courier, Courier New, monospaced;
 29 |   font-size: 10pt;
 30 |   font-weight: normal;
 31 | }
 32 | 
 33 | body {
 34 |   font-family: Helvetica, Arial, sans-serif;
 35 |   font-size: 10pt;
 36 | }
 37 | 
 38 | a {
 39 |   color: #43a4b1;
 40 | }
 41 | 
 42 | .navigators {
 43 |   float: left;
 44 |   margin: 5px;
 45 |   margin-top: 0px;
 46 |   width: 185px;
 47 |   padding: 5px;
 48 |   top: -20px;
 49 |   position: relative;  
 50 | }
 51 | 
 52 | .navigators h2 {
 53 |   background: #FEC293;
 54 |   border: 1px solid #ce9d77;
 55 |   padding: 5px;
 56 | }
 57 | 
 58 | .navigators ul {
 59 |   list-style: none;
 60 |   margin: 0;
 61 |   margin-bottom: 5px;
 62 |   margin-top: 5px;
 63 |   padding-left: 10px;
 64 | }
 65 | 
 66 | .navigators ul li {
 67 |   color: #999;
 68 |   padding: 2px;
 69 | }
 70 | 
 71 | 
 72 | 
 73 | .facet-field {
 74 |   font-weight: bold;
 75 | }
 76 | 
 77 | .highlight {
 78 |   color: white;
 79 |   background-color: gray;
 80 |   border: 1px black solid;
 81 | }
 82 | 
 83 | .highlight-box {
 84 |   margin-left: 15px;
 85 | }
 86 | 
 87 | .field-name {
 88 |   font-weight: bold;
 89 | }
 90 | 
 91 | .highlighted-facet-field {
 92 |   background: white;
 93 | }
 94 | 
 95 | .constraints {
 96 |   margin-top: 10px;
 97 | }
 98 | 
 99 | #query-form{
100 |   width: 80%;
101 | }
102 | 
103 | 
104 | 
105 | .query-box, .constraints {
106 |   padding: 5px;
107 |   margin: 5px;
108 |   font-weight: normal;
109 |   font-size: 24px;
110 |   letter-spacing: 0.08em;
111 | }
112 | 
113 | .query-box #q {
114 |   margin-left: 8px;
115 |   width: 60%;
116 |   height: 50px;
117 |   border: 1px solid #999;
118 |   font-size: 1em;
119 |   padding: 0.4em;
120 | }
121 | 
122 | .query-box {
123 |   
124 | }
125 | 
126 | .query-boost {
127 |   
128 |   top: 10px;
129 |   left: 50px;
130 |   position: relative;
131 |   font-size: 0.8em;
132 | }
133 | 
134 | .query-box .inputs{
135 |   left: 180px;
136 |   position: relative;
137 |   
138 | }
139 | 
140 | #logo {
141 |   margin: 10px;
142 |   border-style: none;
143 | }
144 | 
145 | .pagination {
146 |   padding-left: 33%;
147 |   background: #eee;
148 |   margin: 5px;
149 |   margin-left: 210px;
150 |   padding-top: 5px;
151 |   padding-bottom: 5px;
152 | }
153 | 
154 | .result-document {
155 |   border: 1px solid #999;
156 |   padding: 5px;
157 |   margin: 5px;
158 |   margin-left: 210px;
159 |   margin-bottom: 15px;
160 | }
161 | 
162 | .result-document div{
163 |   padding: 5px;
164 | }
165 | 
166 | .result-title{
167 |   width:60%;
168 | }
169 | 
170 | .result-body{
171 |   background: #ddd;
172 | }
173 | 
174 | .mlt{
175 |   
176 | }
177 | 
178 | .map{
179 |   float: right;
180 |   position: relative;
181 |   top: -25px;  
182 | }
183 | 
184 | .result-document:nth-child(2n+1) {
185 |   background-color: #eee;
186 | }
187 | 
188 | 
189 | .selected-facet-field {
190 |   font-weight: bold;
191 | }
192 | 
193 | li.show {
194 |   list-style: disc;
195 | }
196 | 
197 | .group-value{
198 |   font-weight: bold;
199 | }
200 | 
201 | .error {
202 |   color: white;
203 |   background-color: red;
204 |   left: 210px;
205 |   width:80%;
206 |   position: relative;
207 | 
208 | }
209 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/product-doc.vm:
--------------------------------------------------------------------------------
 1 | <div class="result-title"><b>#field('name')</b><span class="mlt">   #if($params.getBool('mlt', false) == false)<a href="#lensNoQ&q=id:$docId&mlt=true">More Like This</a>#end</span></div>
 2 | ##do we have a physical store for this product
 3 | #set($store = $doc.getFieldValue('store'))
 4 | #if($store)<div class="map"><img src="http://maps.google.com/maps/api/staticmap?&zoom=12&size=150x80&maptype=roadmap&markers=$doc.getFieldValue('store')&sensor=false" /><div><small><a target="_map" href="http://maps.google.com/?q=$store&amp;source=embed">Larger Map</a></small></div></div>#end
 5 | <div>Id: #field('id')</div>
 6 | <div>Price: #field('price_c')</div>
 7 | <div>Features: #field('features')</div>
 8 | <div>In Stock: #field('inStock')</div>
 9 | <div class="mlt">
10 |   #set($mlt = $mltResults.get($docId))
11 |   #set($mltOn = $params.getBool('mlt'))
12 |   #if($mltOn == true)<div class="field-name">Similar Items</div>#end
13 |   #if ($mltOn && $mlt && $mlt.size() > 0)
14 |   <ul>
15 |     #foreach($mltHit in $mlt)
16 |       #set($mltId = $mltHit.getFieldValue('id'))
17 |       <li><div><a href="#url_for_home?q=id:$mltId">$mltId</a></div><div><span class="field-name">Name:</span> $mltHit.getFieldValue('name')</div>
18 |         <div><span class="field-name">Price:</span> $!number.currency($mltHit.getFieldValue('price')) <span class="field-name">In Stock:</span> $mltHit.getFieldValue('inStock')</div>
19 | 
20 |       </li>
21 |     #end
22 |   </ul>
23 |   #elseif($mltOn && $mlt.size() == 0)
24 |     <div>No Similar Items Found</div>
25 |   #end
26 | </div>
27 | #parse('debug.vm')
28 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/query.vm:
--------------------------------------------------------------------------------
 1 | <div class="query-box">
 2 |   <form id="query-form" action="#{url_for_home}" method="GET">
 3 |     <div class="inputs">
 4 |       <span #annTitle("Add the query using the &q= parameter")>Find: <input type="text" id="q" name="q" value="$!esc.html($params.get('q'))"/> <input type="submit" id="querySubmit"/> <input type="reset"/></span>
 5 |       <div class="query-boost"><span #annTitle("Add the boost function &bf=price to the query")><input type="checkbox" name="bf" value="price" #if($request.params.get('bf') == 'price')checked="true"#end>Boost by Price</input></span>
 6 |       #parse("querySpatial.vm")
 7 |       #parse("queryGroup.vm")
 8 |       </div>
 9 |   </div>
10 | 
11 |     #if($request.params.get('debugQuery'))
12 |       <input type="hidden" name="debugQuery" value="true"/>
13 |     #end
14 |     #if($annotate == true)
15 |       <input type="hidden" name="annotateBrowse" value="true"/>
16 |     #end
17 |     #foreach($fq in $request.params.getParams('fq'))
18 |       #if ($fq != "{!bbox}")
19 |         <input type="hidden" name="fq" id="allFQs" value="$esc.html($fq)"/>
20 |       #end
21 |     #end
22 |     <div class="constraints" #annTitle("Lists out the &fq filters.  Click to remove.")>
23 |       #foreach($fq in $params.getParams('fq'))
24 |         #set($previous_fq_count=$velocityCount - 1)
25 |         #if($fq != '')
26 |         &gt; <a style="{text-decoration: line-through;}" href="#url_for_filters($request.params.getParams('fq').subList(0,$previous_fq_count))">$fq</a>
27 |         #end
28 |       #end
29 |     </div>
30 |     <div class="parsed_query_header">
31 |      #if($request.params.get('debugQuery'))
32 |         <a href="#" onclick='jQuery(this).siblings("div").toggle(); return false;'>toggle parsed query</a>
33 |         <div class="parsed_query" style="display:none">$response.response.debug.parsedquery</div>
34 |       #end
35 |       #set($queryOpts = $request.params.get("queryOpts"))
36 |       #if($queryOpts && $queryOpts != "")
37 |         <input type="hidden" name="queryOpts" value="$queryOpts"/>
38 |       #end
39 |     </div>
40 |   </form>
41 | 
42 | </div>
43 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/queryGroup.vm:
--------------------------------------------------------------------------------
 1 | #set($queryOpts = $params.get("queryOpts"))
 2 | #if($queryOpts == "group")
 3 | <div>
 4 |         #set($groupF = $request.params.get('group.field'))
 5 |         <label #annTitle("Add the &group.field parameter. Multiselect is supported")>Group By:
 6 |           <select id="group" name="group.field" multiple="true">
 7 |             ##TODO: Handle multiple selects correctly
 8 |             <option value="none"
 9 |             #if($groupF == '')selected="true"#end>No Group</option>
10 |             <option value="manu_exact"
11 |             #if($groupF == 'manu_exact')selected="true"#end>Manufacturer</option>
12 |             <option value="popularity"
13 |             #if($groupF == 'popularity')selected="true"#end>Popularity</option>
14 |           </select>
15 |         </label>  
16 | <input type="hidden" name="group" value="true"/>
17 | </div>
18 | 
19 | #end
20 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/querySpatial.vm:
--------------------------------------------------------------------------------
 1 | #set($queryOpts = $params.get("queryOpts"))
 2 | #if($queryOpts == "spatial")
 3 | <div>
 4 |         #set($loc = $request.params.get('pt'))
 5 |         #set($dist = $request.params.get('d', "10"))
 6 |         <label #annTitle("Add the &pt parameter")>Location Filter:
 7 |           <select id="pt" name="pt">
 8 |             <option value="none"
 9 |             #if($loc == '')selected="true"#end>No Filter</option>
10 |             <option value="45.17614,-93.87341"
11 |             #if($loc == '45.17614,-93.87341')selected="true"#end>Buffalo, MN</option>
12 |             <option value="37.7752,-100.0232"
13 |             #if($loc == '37.7752,-100.0232')selected="true"#end>Dodge City, KS</option>
14 |             <option value="35.0752,-97.032"
15 |             #if($loc == '35.0752,-97.032')selected="true"#end>Oklahoma City, OK</option>
16 |             <option value="37.7752,-122.4232"
17 |             #if($loc == '37.7752,-122.4232')selected="true"#end>San Francisco CA</option>
18 |           </select>
19 |   </label>
20 |   <span #annTitle("Add the &d parameter")>Distance (KM): <input id="d" name="d" type="text" size="6"
21 |                                                                 value="#if($dist != '')${dist}#{else}10#end"/></span>
22 | <input type="hidden" name="sfield" value="store"/>
23 | <input type="hidden" id="spatialFQ" name="fq" value=""/>
24 | <input type="hidden" name="queryOpts" value="spatial"/>        
25 | </div>
26 | <script type="text/javascript">
27 |   $('#query-form').submit(function() {
28 |     if ($("#pt").val() != "none") {
29 |       $("#spatialFQ").val("{!bbox}");
30 |     }
31 |     $fqs = $("#allFQs").val();
32 |     $fqs = $fqs.replace("{!bbox}", "");
33 |     if ($fqs == ''){
34 |       $("#allFQs").remove();
35 |     }
36 |     $("#allFQs").val($fqs);
37 |     return true;
38 |     });
39 | </script>
40 | #end
41 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/suggest.vm:
--------------------------------------------------------------------------------
1 | #foreach($t in $response.response.terms.name)
2 | $t.key
3 | #end
4 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/velocity/tabs.vm:
--------------------------------------------------------------------------------
1 | ##TODO: Make some nice tabs here
2 | #set($queryOpts = $params.get("queryOpts"))
3 | <span #annTitle("Click the link to demonstrate various Solr capabilities")><span>Examples: </span><span class="tab">#if($queryOpts && $queryOpts != "")<a href="#url_for_home/?#debug#annotate">Simple</a>#{else}Simple#end</span>
4 | <span class="tab">#if($queryOpts == "spatial")Spatial#else<a href="#url_for_home?&queryOpts=spatial#debug#annotate">Spatial</a>#end</span>
5 | <span class="tab">#if($queryOpts == "group")Group By#else<a href="#url_for_home?#debug#annotate&queryOpts=group&group=true&group.field=manu_exact">Group By</a>#end</span></span>
6 | <hr/>        
7 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/xslt/example.xsl:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding='UTF-8'?>
  2 | 
  3 | <!-- 
  4 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  5 |  * contributor license agreements.  See the NOTICE file distributed with
  6 |  * this work for additional information regarding copyright ownership.
  7 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  8 |  * (the "License"); you may not use this file except in compliance with
  9 |  * the License.  You may obtain a copy of the License at
 10 |  *
 11 |  *     http://www.apache.org/licenses/LICENSE-2.0
 12 |  *
 13 |  * Unless required by applicable law or agreed to in writing, software
 14 |  * distributed under the License is distributed on an "AS IS" BASIS,
 15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |  * See the License for the specific language governing permissions and
 17 |  * limitations under the License.
 18 |  -->
 19 | 
 20 | <!-- 
 21 |   Simple transform of Solr query results to HTML
 22 |  -->
 23 | <xsl:stylesheet version='1.0'
 24 |     xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
 25 | >
 26 | 
 27 |   <xsl:output media-type="text/html" encoding="UTF-8"/> 
 28 |   
 29 |   <xsl:variable name="title" select="concat('Solr search results (',response/result/@numFound,' documents)')"/>
 30 |   
 31 |   <xsl:template match='/'>
 32 |     <html>
 33 |       <head>
 34 |         <title><xsl:value-of select="$title"/></title>
 35 |         <xsl:call-template name="css"/>
 36 |       </head>
 37 |       <body>
 38 |         <h1><xsl:value-of select="$title"/></h1>
 39 |         <div class="note">
 40 |           This has been formatted by the sample "example.xsl" transform -
 41 |           use your own XSLT to get a nicer page
 42 |         </div>
 43 |         <xsl:apply-templates select="response/result/doc"/>
 44 |       </body>
 45 |     </html>
 46 |   </xsl:template>
 47 |   
 48 |   <xsl:template match="doc">
 49 |     <xsl:variable name="pos" select="position()"/>
 50 |     <div class="doc">
 51 |       <table width="100%">
 52 |         <xsl:apply-templates>
 53 |           <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param>
 54 |         </xsl:apply-templates>
 55 |       </table>
 56 |     </div>
 57 |   </xsl:template>
 58 | 
 59 |   <xsl:template match="doc/*[@name='score']" priority="100">
 60 |     <xsl:param name="pos"></xsl:param>
 61 |     <tr>
 62 |       <td class="name">
 63 |         <xsl:value-of select="@name"/>
 64 |       </td>
 65 |       <td class="value">
 66 |         <xsl:value-of select="."/>
 67 | 
 68 |         <xsl:if test="boolean(//lst[@name='explain'])">
 69 |           <xsl:element name="a">
 70 |             <!-- can't allow whitespace here -->
 71 |             <xsl:attribute name="href">javascript:toggle("<xsl:value-of select="concat('exp-',$pos)" />");</xsl:attribute>?</xsl:element>
 72 |           <br/>
 73 |           <xsl:element name="div">
 74 |             <xsl:attribute name="class">exp</xsl:attribute>
 75 |             <xsl:attribute name="id">
 76 |               <xsl:value-of select="concat('exp-',$pos)" />
 77 |             </xsl:attribute>
 78 |             <xsl:value-of select="//lst[@name='explain']/str[position()=$pos]"/>
 79 |           </xsl:element>
 80 |         </xsl:if>
 81 |       </td>
 82 |     </tr>
 83 |   </xsl:template>
 84 | 
 85 |   <xsl:template match="doc/arr" priority="100">
 86 |     <tr>
 87 |       <td class="name">
 88 |         <xsl:value-of select="@name"/>
 89 |       </td>
 90 |       <td class="value">
 91 |         <ul>
 92 |         <xsl:for-each select="*">
 93 |           <li><xsl:value-of select="."/></li>
 94 |         </xsl:for-each>
 95 |         </ul>
 96 |       </td>
 97 |     </tr>
 98 |   </xsl:template>
 99 | 
100 | 
101 |   <xsl:template match="doc/*">
102 |     <tr>
103 |       <td class="name">
104 |         <xsl:value-of select="@name"/>
105 |       </td>
106 |       <td class="value">
107 |         <xsl:value-of select="."/>
108 |       </td>
109 |     </tr>
110 |   </xsl:template>
111 | 
112 |   <xsl:template match="*"/>
113 |   
114 |   <xsl:template name="css">
115 |     <script>
116 |       function toggle(id) {
117 |         var obj = document.getElementById(id);
118 |         obj.style.display = (obj.style.display != 'block') ? 'block' : 'none';
119 |       }
120 |     </script>
121 |     <style type="text/css">
122 |       body { font-family: "Lucida Grande", sans-serif }
123 |       td.name { font-style: italic; font-size:80%; }
124 |       td { vertical-align: top; }
125 |       ul { margin: 0px; margin-left: 1em; padding: 0px; }
126 |       .note { font-size:80%; }
127 |       .doc { margin-top: 1em; border-top: solid grey 1px; }
128 |       .exp { display: none; font-family: monospace; white-space: pre; }
129 |     </style>
130 |   </xsl:template>
131 | 
132 | </xsl:stylesheet>
133 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/xslt/example_atom.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='UTF-8'?>
 2 | 
 3 | <!-- 
 4 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 5 |  * contributor license agreements.  See the NOTICE file distributed with
 6 |  * this work for additional information regarding copyright ownership.
 7 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 8 |  * (the "License"); you may not use this file except in compliance with
 9 |  * the License.  You may obtain a copy of the License at
10 |  *
11 |  *     http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  -->
19 | 
20 | <!-- 
21 |   Simple transform of Solr query results to Atom
22 |  -->
23 | 
24 | <xsl:stylesheet version='1.0'
25 |     xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
26 | 
27 |   <xsl:output
28 |        method="xml"
29 |        encoding="utf-8"
30 |        media-type="application/xml"
31 |   />
32 | 
33 |   <xsl:template match='/'>
34 |     <xsl:variable name="query" select="response/lst[@name='responseHeader']/lst[@name='params']/str[@name='q']"/>
35 |     <feed xmlns="http://www.w3.org/2005/Atom">
36 |       <title>Example Solr Atom 1.0 Feed</title>
37 |       <subtitle>
38 |        This has been formatted by the sample "example_atom.xsl" transform -
39 |        use your own XSLT to get a nicer Atom feed.
40 |       </subtitle>
41 |       <author>
42 |         <name>Apache Solr</name>
43 |         <email>solr-user@lucene.apache.org</email>
44 |       </author>
45 |       <link rel="self" type="application/atom+xml" 
46 |             href="http://localhost:8983/solr/q={$query}&amp;wt=xslt&amp;tr=atom.xsl"/>
47 |       <updated>
48 |         <xsl:value-of select="response/result/doc[position()=1]/date[@name='timestamp']"/>
49 |       </updated>
50 |       <id>tag:localhost,2007:example</id>
51 |       <xsl:apply-templates select="response/result/doc"/>
52 |     </feed>
53 |   </xsl:template>
54 |     
55 |   <!-- search results xslt -->
56 |   <xsl:template match="doc">
57 |     <xsl:variable name="id" select="str[@name='id']"/>
58 |     <entry>
59 |       <title><xsl:value-of select="str[@name='name']"/></title>
60 |       <link href="http://localhost:8983/solr/select?q={$id}"/>
61 |       <id>tag:localhost,2007:<xsl:value-of select="$id"/></id>
62 |       <summary><xsl:value-of select="arr[@name='features']"/></summary>
63 |       <updated><xsl:value-of select="date[@name='timestamp']"/></updated>
64 |     </entry>
65 |   </xsl:template>
66 | 
67 | </xsl:stylesheet>
68 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/xslt/example_rss.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='UTF-8'?>
 2 | 
 3 | <!-- 
 4 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 5 |  * contributor license agreements.  See the NOTICE file distributed with
 6 |  * this work for additional information regarding copyright ownership.
 7 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 8 |  * (the "License"); you may not use this file except in compliance with
 9 |  * the License.  You may obtain a copy of the License at
10 |  *
11 |  *     http://www.apache.org/licenses/LICENSE-2.0
12 |  *
13 |  * Unless required by applicable law or agreed to in writing, software
14 |  * distributed under the License is distributed on an "AS IS" BASIS,
15 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |  * See the License for the specific language governing permissions and
17 |  * limitations under the License.
18 |  -->
19 | 
20 | <!-- 
21 |   Simple transform of Solr query results to RSS
22 |  -->
23 | 
24 | <xsl:stylesheet version='1.0'
25 |     xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
26 | 
27 |   <xsl:output
28 |        method="xml"
29 |        encoding="utf-8"
30 |        media-type="application/xml"
31 |   />
32 |   <xsl:template match='/'>
33 |     <rss version="2.0">
34 |        <channel>
35 | 	 <title>Example Solr RSS 2.0 Feed</title>
36 |          <link>http://localhost:8983/solr</link>
37 |          <description>
38 |           This has been formatted by the sample "example_rss.xsl" transform -
39 |           use your own XSLT to get a nicer RSS feed.
40 |          </description>
41 |          <language>en-us</language>
42 |          <docs>http://localhost:8983/solr</docs>
43 |          <xsl:apply-templates select="response/result/doc"/>
44 |        </channel>
45 |     </rss>
46 |   </xsl:template>
47 |   
48 |   <!-- search results xslt -->
49 |   <xsl:template match="doc">
50 |     <xsl:variable name="id" select="str[@name='id']"/>
51 |     <xsl:variable name="timestamp" select="date[@name='timestamp']"/>
52 |     <item>
53 |       <title><xsl:value-of select="str[@name='name']"/></title>
54 |       <link>
55 |         http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
56 |       </link>
57 |       <description>
58 |         <xsl:value-of select="arr[@name='features']"/>
59 |       </description>
60 |       <pubDate><xsl:value-of select="$timestamp"/></pubDate>
61 |       <guid>
62 |         http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/>
63 |       </guid>
64 |     </item>
65 |   </xsl:template>
66 | </xsl:stylesheet>
67 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/conf/xslt/updateXml.xsl:
--------------------------------------------------------------------------------
 1 | <!-- 
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  -->
17 | 
18 | <!--
19 |   Simple transform of Solr query response into Solr Update XML compliant XML.
20 |   When used in the xslt response writer you will get UpdaateXML as output.
21 |   But you can also store a query response XML to disk and feed this XML to
22 |   the XSLTUpdateRequestHandler to index the content. Provided as example only.
23 |   See http://wiki.apache.org/solr/XsltUpdateRequestHandler for more info
24 |  -->
25 | <xsl:stylesheet version='1.0' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>
26 |   <xsl:output media-type="text/xml" method="xml" indent="yes"/>
27 | 
28 |   <xsl:template match='/'>
29 |     <add>
30 |         <xsl:apply-templates select="response/result/doc"/>
31 |     </add>
32 |   </xsl:template>
33 |   
34 |   <!-- Ignore score (makes no sense to index) -->
35 |   <xsl:template match="doc/*[@name='score']" priority="100">
36 |   </xsl:template>
37 | 
38 |   <xsl:template match="doc">
39 |     <xsl:variable name="pos" select="position()"/>
40 |     <doc>
41 |         <xsl:apply-templates>
42 |           <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param>
43 |         </xsl:apply-templates>
44 |     </doc>
45 |   </xsl:template>
46 | 
47 |   <!-- Flatten arrays to duplicate field lines -->
48 |   <xsl:template match="doc/arr" priority="100">
49 |       <xsl:variable name="fn" select="@name"/>
50 |       
51 |       <xsl:for-each select="*">
52 | 		<xsl:element name="field">
53 | 		    <xsl:attribute name="name"><xsl:value-of select="$fn"/></xsl:attribute>
54 | 	        <xsl:value-of select="."/>
55 | 		</xsl:element>
56 |       </xsl:for-each>
57 |   </xsl:template>
58 | 
59 | 
60 |   <xsl:template match="doc/*">
61 |       <xsl:variable name="fn" select="@name"/>
62 | 
63 | 	<xsl:element name="field">
64 | 	    <xsl:attribute name="name"><xsl:value-of select="$fn"/></xsl:attribute>
65 |         <xsl:value-of select="."/>
66 | 	</xsl:element>
67 |   </xsl:template>
68 | 
69 |   <xsl:template match="*"/>
70 | </xsl:stylesheet>
71 | 


--------------------------------------------------------------------------------
/solr-local-search/solr_local/reviews.conf:
--------------------------------------------------------------------------------
 1 | # Specify server locations in a SOLR_LOCATOR variable; used later in
 2 | # variable substitutions:
 3 | SOLR_LOCATOR : {
 4 |   # Name of solr collection
 5 |   collection : reviews
 6 | 
 7 |   # ZooKeeper ensemble
 8 |   zkHost : "127.0.0.1:2181/solr"
 9 | }
10 | 
11 | # Specify an array of one or more morphlines, each of which defines an ETL
12 | # transformation chain. A morphline consists of one or more (potentially
13 | # nested) commands. A morphline is a way to consume records (e.g. Flume events,
14 | # HDFS files or blocks), turn them into a stream of records, and pipe the stream
15 | # of records through a set of easily configurable transformations on the way to
16 | # a target application such as Solr.
17 | morphlines : [
18 |   {
19 |     id : reviews
20 |     importCommands : ["org.kitesdk.**", "org.apache.solr.**"]
21 |     commands : [
22 |       {
23 |         readCSV {
24 |           separator : ","
25 |           columns : [business_id, cool, date, funny, id, stars, text, type, useful, user_id, name, city, full_address, latitude, longitude, neighborhoods, open, review_count, state]
26 |           quoteChar : "\""
27 |         }
28 |       }
29 | 
30 |       {
31 |          if { 
32 |          conditions : [
33 |            { 
34 |               equals { id : []  }   
35 |            } 
36 |          ]
37 |          then : [ 
38 |            { 
39 |              dropRecord {} 
40 |            }
41 |          ]
42 |         }
43 |       }
44 | 
45 |       # Consume the output record of the previous command and pipe another
46 |       # record downstream.
47 |       #
48 |       # Command that deletes record fields that are unknown to Solr
49 |       # schema.xml.
50 |       #
51 |       # Recall that Solr throws an exception on any attempt to load a document
52 |       # that contains a field that isn't specified in schema.xml.
53 |       {
54 |         sanitizeUnknownSolrFields {
55 |           # Location from which to fetch Solr schema
56 |           solrLocator : ${SOLR_LOCATOR}
57 |         }
58 |       }
59 | 
60 |       # log the record at DEBUG level to SLF4J
61 |       { logDebug { format : "output record: {}", args : ["@{}"] } }
62 | 
63 |       # load the record into a Solr server or MapReduce Reducer
64 |       {
65 |         loadSolr {         
66 |           solrLocator : ${SOLR_LOCATOR}
67 |         }
68 |       }
69 |     ]
70 |   }
71 | ]
72 | 


--------------------------------------------------------------------------------
/spark/bikeshare/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | http://gethue.com/bay-area-bikeshare-data-analysis-with-search-and-spark-notebook
5 | 
6 | 


--------------------------------------------------------------------------------
/spark/bikeshare/notebook.txt:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from datetime import datetime
 4 | 
 5 | data = sc.textFile("/user/romain/bikes/201408_weather_data.csv")
 6 | 
 7 | print 'Number of records: ' + str(data.count())
 8 | 
 9 | print '\nFirst ten records:'
10 | for row in data.take(10):
11 |   print row
12 | 
13 | headers = data.first()
14 | 
15 | print 'Raw headers:\n%s' % headers
16 | 
17 | headers_names = [header.replace(' ', '') for header in headers.split(',')]
18 | 
19 | print '\nCleaned headers:\n%s' % headers_names
20 | 
21 | import re
22 | 
23 | def guess_type(cell):
24 |   if re.match('\d+\.\d+', cell):
25 |     return '_f'
26 |   elif re.match('\d/\d/\d\d\d\d', cell):
27 |     return '_tdt'
28 |   elif re.match('\d+', cell):
29 |     return '_i'
30 |   else:
31 |     return '_s'
32 | 
33 | # We guess the type from the first row of data
34 | headers_types = [guess_type(cell) for cell in data.take(2)[1].split(',')]
35 | headers = [''.join(h) for h in zip(headers_names, headers_types)]
36 | 
37 | headers = ','.join(headers)
38 | 
39 | print 'Unified headers:\n%s' % headers
40 | 
41 | def clean_row(row):
42 |   cols = row.split(',')
43 | 
44 |   date = datetime.strptime(cols[0], '%m/%d/%Y')
45 |   cols[0] = date.strftime('%Y-%m-%dT%H:%M:%SZ') 
46 | 
47 |   if cols[-5] == 'T':
48 |     cols[-5] = '0'
49 | 
50 |   return ','.join(cols)
51 | 
52 | # Skip the header row and apply the cleaning function
53 | records = data.filter(lambda row: not row.startswith('PDT')).map(clean_row)
54 | 
55 | print 'First ten records with data normalized:'
56 | 
57 | for row in records.take(10):
58 |   print row
59 | 
60 | csv = headers + '\n' + records.reduce(lambda a, b: a + '\n' + b)
61 | 
62 | print 'Beginning of csv data:\n%s' % csv[:1000]
63 | 
64 | import urllib2
65 | 
66 | req = urllib2.Request('http://localhost:8983/solr/baybikes/update?commitWithin=5000')
67 | req.add_header('Content-Type', 'text/csv')
68 | 
69 | response = urllib2.urlopen(req, csv)
70 | 
71 | print 'Done!'
72 | 


--------------------------------------------------------------------------------
/sqoop2/README.md:
--------------------------------------------------------------------------------
1 | Blog URL
2 | ========
3 | 
4 | TBD
5 | 


--------------------------------------------------------------------------------
/sqoop2/create_table.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | use test;
 3 | 
 4 | create table stats (             
 5 |   funny int,     
 6 |   useful int,     
 7 |   cool int,     
 8 |   stars int
 9 | );
10 | 
11 | 


--------------------------------------------------------------------------------
/sqoop2/stats.pig:
--------------------------------------------------------------------------------
 1 | data = 
 2 |    LOAD '/user/hive/warehouse/review/part-m-00000'
 3 |    AS (funny:INT, useful:INT, cool:INT, user_id:CHARARRAY, review_id:CHARARRAY,
 4 |        stars:INT, text:CHARARRAY, business_id:CHARARRAY, date:CHARARRAY, type:CHARARRAY);
 5 |        
 6 | ratings = 
 7 |   FOREACH data
 8 |   GENERATE
 9 |     funny,
10 |     useful,
11 |     cool,
12 |     stars;
13 |   
14 | top = 
15 |   ORDER ratings
16 |   BY stars DESC;
17 |   
18 | top_1000 =
19 |   LIMIT top 1000;
20 |   
21 | STORE top_1000 INTO 'review_stats' USING PigStorage(',');
22 | 
23 | 


--------------------------------------------------------------------------------
/static/hue-3.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/romainr/hadoop-tutorials-examples/a070880c7d44402997080d860bb49cc234dff879/static/hue-3.5.png


--------------------------------------------------------------------------------