├── .DS_Store
├── .project
├── .settings
├── org.eclipse.core.resources.prefs
└── org.eclipse.m2e.core.prefs
├── Dockerfile
├── JPA-Access
├── .classpath
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── pom.xml
├── src
│ ├── META-INF
│ │ └── persistence.xml
│ ├── main
│ │ └── java
│ │ │ ├── com
│ │ │ └── rts
│ │ │ │ └── mysql
│ │ │ │ ├── dao
│ │ │ │ └── Result.java
│ │ │ │ └── util
│ │ │ │ ├── DaoUtil.java
│ │ │ │ └── EMfactory.java
│ │ │ └── org
│ │ │ └── JPA
│ │ │ └── Access
│ │ │ └── App.java
│ └── test
│ │ └── java
│ │ └── org
│ │ └── JPA
│ │ └── Access
│ │ └── AppTest.java
└── target
│ ├── classes
│ ├── META-INF
│ │ ├── MANIFEST.MF
│ │ └── maven
│ │ │ └── com.rts.scrap
│ │ │ └── JPA-Access
│ │ │ ├── pom.properties
│ │ │ └── pom.xml
│ ├── com
│ │ └── rts
│ │ │ └── mysql
│ │ │ ├── dao
│ │ │ └── Result.class
│ │ │ └── util
│ │ │ ├── DaoUtil.class
│ │ │ └── EMfactory.class
│ └── org
│ │ └── JPA
│ │ └── Access
│ │ └── App.class
│ └── test-classes
│ └── org
│ └── JPA
│ └── Access
│ └── AppTest.class
├── License
├── RTS.db
├── RTS.log
├── RTTM_Logo.png
├── Readme.md
├── architecture.png
├── docs
├── .DS_Store
├── Makefile
├── build
│ ├── doctrees
│ │ ├── apiKeySetUp.doctree
│ │ ├── architecture.doctree
│ │ ├── configuration.doctree
│ │ ├── contents.doctree
│ │ ├── contributors.doctree
│ │ ├── debugging.doctree
│ │ ├── environment.pickle
│ │ ├── index.doctree
│ │ ├── intro.doctree
│ │ ├── license.doctree
│ │ ├── roadmap.doctree
│ │ └── setupGuide.doctree
│ └── html
│ │ ├── .buildinfo
│ │ ├── _images
│ │ ├── RTTM_Logo.png
│ │ └── architecture.png
│ │ ├── _sources
│ │ ├── apiKeySetUp.md.txt
│ │ ├── architecture.md.txt
│ │ ├── configuration.md.txt
│ │ ├── contents.rst.txt
│ │ ├── contributors.md.txt
│ │ ├── debugging.md.txt
│ │ ├── index.rst.txt
│ │ ├── intro.md.txt
│ │ ├── license.md.txt
│ │ ├── roadmap.md.txt
│ │ └── setupGuide.md.txt
│ │ ├── _static
│ │ ├── RTTM_Logo.png
│ │ ├── alabaster.css
│ │ ├── architecture.png
│ │ ├── basic.css
│ │ ├── css
│ │ │ ├── badge_only.css
│ │ │ └── theme.css
│ │ ├── custom.css
│ │ ├── doctools.js
│ │ ├── documentation_options.js
│ │ ├── file.png
│ │ ├── fonts
│ │ │ ├── Inconsolata-Bold.ttf
│ │ │ ├── Inconsolata-Regular.ttf
│ │ │ ├── Inconsolata.ttf
│ │ │ ├── Lato-Bold.ttf
│ │ │ ├── Lato-Regular.ttf
│ │ │ ├── Lato
│ │ │ │ ├── lato-bold.eot
│ │ │ │ ├── lato-bold.ttf
│ │ │ │ ├── lato-bold.woff
│ │ │ │ ├── lato-bold.woff2
│ │ │ │ ├── lato-bolditalic.eot
│ │ │ │ ├── lato-bolditalic.ttf
│ │ │ │ ├── lato-bolditalic.woff
│ │ │ │ ├── lato-bolditalic.woff2
│ │ │ │ ├── lato-italic.eot
│ │ │ │ ├── lato-italic.ttf
│ │ │ │ ├── lato-italic.woff
│ │ │ │ ├── lato-italic.woff2
│ │ │ │ ├── lato-regular.eot
│ │ │ │ ├── lato-regular.ttf
│ │ │ │ ├── lato-regular.woff
│ │ │ │ └── lato-regular.woff2
│ │ │ ├── RobotoSlab-Bold.ttf
│ │ │ ├── RobotoSlab-Regular.ttf
│ │ │ ├── RobotoSlab
│ │ │ │ ├── roboto-slab-v7-bold.eot
│ │ │ │ ├── roboto-slab-v7-bold.ttf
│ │ │ │ ├── roboto-slab-v7-bold.woff
│ │ │ │ ├── roboto-slab-v7-bold.woff2
│ │ │ │ ├── roboto-slab-v7-regular.eot
│ │ │ │ ├── roboto-slab-v7-regular.ttf
│ │ │ │ ├── roboto-slab-v7-regular.woff
│ │ │ │ └── roboto-slab-v7-regular.woff2
│ │ │ ├── fontawesome-webfont.eot
│ │ │ ├── fontawesome-webfont.svg
│ │ │ ├── fontawesome-webfont.ttf
│ │ │ ├── fontawesome-webfont.woff
│ │ │ └── fontawesome-webfont.woff2
│ │ ├── jquery-3.4.1.js
│ │ ├── jquery.js
│ │ ├── js
│ │ │ ├── modernizr.min.js
│ │ │ └── theme.js
│ │ ├── language_data.js
│ │ ├── minus.png
│ │ ├── plus.png
│ │ ├── pygments.css
│ │ ├── searchtools.js
│ │ ├── underscore-1.3.1.js
│ │ └── underscore.js
│ │ ├── apiKeySetUp.html
│ │ ├── architecture.html
│ │ ├── configuration.html
│ │ ├── contents.html
│ │ ├── contributors.html
│ │ ├── debugging.html
│ │ ├── genindex.html
│ │ ├── index.html
│ │ ├── intro.html
│ │ ├── license.html
│ │ ├── objects.inv
│ │ ├── roadmap.html
│ │ ├── search.html
│ │ ├── searchindex.js
│ │ └── setupGuide.html
├── make.bat
└── source
│ ├── .DS_Store
│ ├── _static
│ ├── RTTM_Logo.png
│ └── architecture.png
│ ├── apiKeySetUp.md
│ ├── architecture.md
│ ├── conf.py
│ ├── configuration.md
│ ├── contents.rst
│ ├── contributors.md
│ ├── debugging.md
│ ├── index.rst
│ ├── intro.md
│ ├── license.md
│ ├── roadmap.md
│ └── setupGuide.md
├── kafka-parser
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── html-mail-template.ftl
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── org
│ │ │ └── kafkaparser
│ │ │ ├── base
│ │ │ ├── NotificationConsumerGroup.java
│ │ │ ├── NotificationConsumerThread.java
│ │ │ └── Producer.java
│ │ │ ├── deseralize
│ │ │ └── DataDeserializer.java
│ │ │ ├── pojo
│ │ │ └── Data.java
│ │ │ ├── serialize
│ │ │ └── DataSerializer.java
│ │ │ └── utilities
│ │ │ ├── ConfigData.java
│ │ │ ├── ConfigParams.java
│ │ │ ├── DbUtil.java
│ │ │ ├── EmailUtility.java
│ │ │ ├── Git.java
│ │ │ ├── HttpUtilities.java
│ │ │ ├── PastieParseAndSearch.java
│ │ │ ├── Search.java
│ │ │ ├── SearchThread.java
│ │ │ └── TruffleHog.java
│ └── resources
│ │ └── html-mail-template.ftl
│ └── test
│ └── java
│ └── org
│ └── kafka
│ └── parser
│ └── AppTest.java
├── pom.xml
├── rts-base
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── pom.xml
└── src
│ ├── main
│ └── java
│ │ └── org
│ │ └── rts
│ │ └── base
│ │ ├── Scrapper.java
│ │ ├── ScrapperImpl.java
│ │ ├── ScrapperProfile.java
│ │ ├── exceptions
│ │ └── ScrapperNotvalidException.java
│ │ ├── profileregistry
│ │ └── ScrapperProfileRegistry.java
│ │ └── utilities
│ │ └── PropertyUtilities.java
│ └── test
│ └── java
│ └── org
│ └── rts
│ └── base
│ └── AppTest.java
├── rts-impl
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── org
│ │ │ └── rts
│ │ │ ├── impl
│ │ │ ├── GithubImpl.java
│ │ │ ├── PastieImpl.java
│ │ │ ├── RedditImpl.java
│ │ │ └── TwitterImpl.java
│ │ │ ├── rtsprofile
│ │ │ ├── CodepadProfile.java
│ │ │ ├── Dumpz.java
│ │ │ ├── GistGithubProfile.java
│ │ │ ├── GithubProfile.java
│ │ │ ├── IdeonecomProfile.java
│ │ │ ├── KpastenetProfile.java
│ │ │ ├── Lpaste.java
│ │ │ ├── PastebincaProfile.java
│ │ │ ├── PastebinfrProfile.java
│ │ │ ├── PastebinruProfile.java
│ │ │ ├── Pasteorgru.java
│ │ │ ├── PastieProfile.java
│ │ │ ├── RedditProfile.java
│ │ │ ├── SlexyOrgProfile.java
│ │ │ ├── Snipplr.java
│ │ │ └── TwitterProfile.java
│ │ │ └── utilities
│ │ │ ├── Difference.java
│ │ │ ├── JsonParserForGithub.java
│ │ │ ├── JsonParserForReddit.java
│ │ │ └── TruffleHog.java
│ └── resources
│ │ ├── META-INF
│ │ └── services
│ │ │ └── org.rts.base.ScrapperProfile
│ │ └── log4j.properties
│ └── test
│ └── java
│ └── org
│ └── rts
│ └── impl
│ └── AppTest.java
├── scrapper_config
├── consumer.properties
├── email.properties
├── global.properties
├── html-mail-template.ftl
├── look for ssrf via host header
├── producer.properties
├── proxy.properties
├── scanner-configuration.properties
└── useragents-list.txt
├── scraptool
├── .classpath
├── .gitignore
├── .project
├── .settings
│ ├── org.eclipse.core.resources.prefs
│ ├── org.eclipse.jdt.core.prefs
│ └── org.eclipse.m2e.core.prefs
├── RTS.db
├── RTS.log
├── pom.xml
└── src
│ ├── main
│ └── java
│ │ └── org
│ │ └── scraptool
│ │ └── ScrapperTool.java
│ └── test
│ └── java
│ └── org
│ └── scraptool
│ └── AppTest.java
├── script
├── .DS_Store
├── cleanup.sh
├── db_setup.sh
└── initialize.sh
└── sqlite-dataaccess
├── .classpath
├── .gitignore
├── .project
├── .settings
├── org.eclipse.core.resources.prefs
├── org.eclipse.jdt.core.prefs
└── org.eclipse.m2e.core.prefs
├── pom.xml
└── src
├── main
├── java
│ └── org
│ │ └── sqlite
│ │ └── dataaccess
│ │ ├── entity
│ │ ├── Result.java
│ │ └── SearchItem.java
│ │ └── util
│ │ ├── DaoUtil.java
│ │ ├── EMfactory.java
│ │ └── SQLiteDialect.java
└── resources
│ ├── META-INF
│ └── persistence.xml
│ ├── import.sql
│ └── log4j.properties
└── test
└── java
└── org
└── sqlite
└── dataaccess
└── AppTest.java
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/.DS_Store
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | rts
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.m2e.core.maven2Builder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.m2e.core.maven2Nature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding/=UTF-8
3 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM openjdk:8
2 |
3 | ENV DEBIAN_FRONTEND noninteractive
4 |
5 | RUN apt-get update
6 |
7 | RUN apt install -y maven
8 |
9 | RUN apt-get install git
10 |
11 | RUN git config --global user.email "test@rttm.com"
12 |
13 | RUN apt-get install -y mysql-server
14 |
15 | RUN apt-get install -y \
16 | zookeeper \
17 | wget \
18 | dnsutils \
19 | vim \
20 | && rm -rf /var/lib/apt/lists/*
21 |
22 | ENV KAFKA_VERSION 2.1.1
23 | ENV SCALA_VERSION 2.11
24 | RUN wget -q \
25 | http://apache.mirrors.spacedump.net/kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \
26 | -O /tmp/kafka.tgz \
27 | && tar xfz /tmp/kafka.tgz -C /opt \
28 | && rm /tmp/kafka.tgz \
29 | && mv /opt/kafka_${SCALA_VERSION}-${KAFKA_VERSION} /opt/kafka
30 |
31 |
32 | WORKDIR /opt/RTTM
33 |
34 | RUN git clone https://github.com/NaveenRudra/RTTM.git
35 |
36 | RUN git clone https://github.com/dxa4481/truffleHog.git
37 |
38 | RUN wget https://bootstrap.pypa.io/get-pip.py
39 |
40 | RUN python get-pip.py
41 |
42 | RUN rm get-pip.py
43 |
44 |
45 | WORKDIR /opt/RTTM/truffleHog
46 |
47 | RUN pip install -r requirements.txt
48 |
49 | RUN python setup.py install
50 |
51 |
52 | WORKDIR /opt/RTTM/RTTM
53 |
54 | RUN mvn install -D skipTests
55 |
56 |
57 |
58 |
59 |
--------------------------------------------------------------------------------
/JPA-Access/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/JPA-Access/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | JPA-Access
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/JPA-Access/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/test/java=UTF-8
4 | encoding/=UTF-8
5 |
--------------------------------------------------------------------------------
/JPA-Access/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5 | org.eclipse.jdt.core.compiler.release=disabled
6 | org.eclipse.jdt.core.compiler.source=1.8
7 |
--------------------------------------------------------------------------------
/JPA-Access/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/JPA-Access/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.rts.scrap
7 | rts
8 | 1.0-SNAPSHOT
9 |
10 | com.rts.scrap
11 | JPA-Access
12 | 1.0-SNAPSHOT
13 | JPA-Access
14 | http://maven.apache.org
15 |
16 | UTF-8
17 |
18 |
19 |
20 | junit
21 | junit
22 | 3.8.1
23 | test
24 |
25 |
26 | javax.persistence
27 | persistence-api
28 | 1.0.2
29 | provided
30 |
31 |
32 | javax.transaction
33 | javax.transaction-api
34 | 1.2
35 | provided
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/JPA-Access/src/META-INF/persistence.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | de.vogella.jpa.simple.model.Todo
7 |
8 |
9 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/JPA-Access/src/main/java/com/rts/mysql/dao/Result.java:
--------------------------------------------------------------------------------
1 | package com.rts.mysql.dao;
2 |
3 | import java.io.Serializable;
4 | import java.util.ArrayList;
5 |
6 | import javax.persistence.Column;
7 | import javax.persistence.Entity;
8 | import javax.persistence.GeneratedValue;
9 | import javax.persistence.GenerationType;
10 | import javax.persistence.Id;
11 |
12 | /**
13 | *
14 | * @author Josue R G Junior josueribeiro.jr@gmail.com
15 | */
16 | @Entity
17 | public class Result implements Serializable {
18 |
19 | private static final long serialVersionUID = -7250234396452258822L;
20 |
21 | @Id
22 | @Column(name = "id_scrapper")
23 | @GeneratedValue(strategy = GenerationType.AUTO)
24 | private Integer id;
25 | private String url;
26 | private String time;
27 | private String searchedtext;
28 |
29 | @ManytoMany(fetch = FetchType.LAZY, cascade = CascadeType.ALL)
30 | @JoinTable(name = "result_search_item", joinColumns = {
31 | @JoinColumn(name = "url", nullable = false, updatable = false) }, inverseJoinColumns = {
32 | @JoinColumn(name = "id", nullable = false, updatable = false) })
33 | private Set searchedTerms;
34 |
35 | public String getBotName() {
36 | return botName;
37 | }
38 |
39 | public void setBotName(String botName) {
40 | this.botName = botName;
41 | }
42 |
43 | private String botName;
44 |
45 | // add one extra column from future perspective
46 | // add one extra column if it is false or true
47 | public ArrayList getSearchedTerms() {
48 | return searchedTerms;
49 | }
50 |
51 | public void setSearchedTerms(Set searchedTerms) {
52 | this.searchedTerms = searchedTerms;
53 | }
54 |
55 | public String getUrl() {
56 | return url;
57 | }
58 |
59 | public void setUrl(String url) {
60 | this.url = url;
61 | }
62 |
63 | public String getTime() {
64 | return time;
65 | }
66 |
67 | public void setTime(String time) {
68 | this.time = time;
69 | }
70 |
71 | public String getSearchedtext() {
72 | return searchedtext;
73 | }
74 |
75 | public void setSearchedtext(String searchedtext) {
76 | this.searchedtext = searchedtext;
77 | }
78 |
79 | public Integer getId() {
80 | return id;
81 | }
82 |
83 | public void setId(Integer id) {
84 | this.id = id;
85 | }
86 |
87 | }
--------------------------------------------------------------------------------
/JPA-Access/src/main/java/com/rts/mysql/util/DaoUtil.java:
--------------------------------------------------------------------------------
1 | package com.rts.mysql.util;
2 |
3 | import java.text.DateFormat;
4 | import java.text.SimpleDateFormat;
5 | import java.util.ArrayList;
6 | import java.util.Date;
7 | import javax.transaction.Transactional;
8 |
9 | import com.rts.mysql.dao.Result;
10 |
11 | public class DaoUtil {
12 |
13 | static
14 | {
15 | EMfactory.setUp();
16 | EMfactory.initEntityManager();
17 | }
18 |
19 | @Transactional
20 | public synchronized static void insert(Result data)
21 | {
22 | EMfactory.em.getTransaction().begin();
23 | EMfactory.em.persist(data);
24 | EMfactory.em.getTransaction().commit();
25 |
26 | }
27 |
28 | @Transactional
29 | public synchronized static boolean searchDuplicateByUrl(String url)
30 | {
31 | //System.out.println("In db url is : "+url);
32 | TypedQuery query = EMfactory.em.createQuery(
33 | "SELECT result FROM Result result where result.url='"+url+"'" , Result.class);
34 | ArrayList results = (ArrayList) query.getResultList();
35 |
36 | //System.out.println("query size :"+Integer.toString(results.size()));
37 | if(results.size()>0)
38 | {
39 | return true;
40 | }
41 | return false;
42 | }
43 |
44 | public static void main (String [] args)
45 | {
46 | // Result person = new Result();
47 | // ArrayList test=new ArrayList<>();
48 | // test.add("asd");
49 | // person.setSearchedTerms(test);
50 | // person.setSearchedtext("some lines up and down man");
51 | // person.setUrl("http://google.com4");
52 | // DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
53 | // Date date = new Date();
54 | // person.setTime(dateFormat.format(date).toString());
55 | System.out.println("Stated intializing*****************************************************************************");
56 | for (int i=0;i<3;i++)
57 | {
58 | if(searchDuplicateByUrl("http://google.com71"))
59 | {
60 | System.out.println(Integer.toString(i) +" *************************-----found");
61 | System.out.println(Integer.toString(i) +" *************************-----found");
62 | System.out.println(Integer.toString(i) +" *************************-----found");
63 | }
64 |
65 | else
66 | {
67 | System.out.println(Integer.toString(i) +" *************************-----not found");
68 | System.out.println(Integer.toString(i) +" *************************-----not found");
69 | System.out.println(Integer.toString(i) +" *************************-----not found");
70 |
71 | }
72 |
73 | Result person = new Result();
74 | ArrayList test=new ArrayList<>();
75 | test.add("asd");
76 | person.setSearchedTerms(test);
77 | person.setSearchedtext("some lines up and down man");
78 | person.setUrl("http://google.com71");
79 | DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
80 | Date date = new Date();
81 | person.setTime(dateFormat.format(date).toString());
82 | DaoUtil.insert(person);
83 | Result person1 = new Result();
84 | ArrayList test1=new ArrayList<>();
85 | test1.add("asdq");
86 | person1.setSearchedTerms(test);
87 | person1.setSearchedtext("some lines up and down man");
88 | person1.setUrl("http://google.com712");
89 | DaoUtil.insert(person1);
90 |
91 |
92 | }
93 | //EMfactory.em.persist(person);
94 | //EMfactory.em.getTransaction().commit();
95 |
96 |
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/JPA-Access/src/main/java/com/rts/mysql/util/EMfactory.java:
--------------------------------------------------------------------------------
1 | package com.rts.mysql.util;
2 |
3 | import javax.persistence.Persistence;
4 | import javax.persistence.EntityManager;
5 | import javax.persistence.EntityManagerFactory;
6 |
7 | public class EMfactory {
8 |
9 | public static EntityManagerFactory emf;
10 | public static EntityManager em;
11 |
12 | public static void setUp() {
13 | emf = Persistence.createEntityManagerFactory("sqlite-dataAccess");
14 | }
15 |
16 | public static void initEntityManager() {
17 | em = emf.createEntityManager();
18 |
19 |
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/JPA-Access/src/main/java/org/JPA/Access/App.java:
--------------------------------------------------------------------------------
1 | package org.JPA.Access;
2 |
3 | /**
4 | * Hello world!
5 | *
6 | */
7 | public class App
8 | {
9 | public static void main( String[] args )
10 | {
11 | System.out.println( "Hello World!" );
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/JPA-Access/src/test/java/org/JPA/Access/AppTest.java:
--------------------------------------------------------------------------------
1 | package org.JPA.Access;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/JPA-Access/target/classes/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Built-By: n0r00ij
3 | Build-Jdk: 11.0.2
4 | Created-By: Maven Integration for Eclipse
5 |
6 |
--------------------------------------------------------------------------------
/JPA-Access/target/classes/META-INF/maven/com.rts.scrap/JPA-Access/pom.properties:
--------------------------------------------------------------------------------
1 | #Generated by Maven Integration for Eclipse
2 | #Wed Aug 14 20:21:01 IST 2019
3 | m2e.projectLocation=/Users/n0r00ij/Documents/GitHub/RTS/JPA-Access
4 | m2e.projectName=JPA-Access
5 | groupId=com.rts.scrap
6 | artifactId=JPA-Access
7 | version=1.0-SNAPSHOT
8 |
--------------------------------------------------------------------------------
/JPA-Access/target/classes/META-INF/maven/com.rts.scrap/JPA-Access/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.rts.scrap
7 | rts
8 | 1.0-SNAPSHOT
9 |
10 | com.rts.scrap
11 | JPA-Access
12 | 1.0-SNAPSHOT
13 | JPA-Access
14 | http://maven.apache.org
15 |
16 | UTF-8
17 |
18 |
19 |
20 | junit
21 | junit
22 | 3.8.1
23 | test
24 |
25 |
26 | javax.persistence
27 | persistence-api
28 | 1.0.2
29 | provided
30 |
31 |
32 | javax.transaction
33 | javax.transaction-api
34 | 1.2
35 | provided
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/JPA-Access/target/classes/com/rts/mysql/dao/Result.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/classes/com/rts/mysql/dao/Result.class
--------------------------------------------------------------------------------
/JPA-Access/target/classes/com/rts/mysql/util/DaoUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/classes/com/rts/mysql/util/DaoUtil.class
--------------------------------------------------------------------------------
/JPA-Access/target/classes/com/rts/mysql/util/EMfactory.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/classes/com/rts/mysql/util/EMfactory.class
--------------------------------------------------------------------------------
/JPA-Access/target/classes/org/JPA/Access/App.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/classes/org/JPA/Access/App.class
--------------------------------------------------------------------------------
/JPA-Access/target/test-classes/org/JPA/Access/AppTest.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/test-classes/org/JPA/Access/AppTest.class
--------------------------------------------------------------------------------
/RTS.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/RTS.db
--------------------------------------------------------------------------------
/RTS.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/RTS.log
--------------------------------------------------------------------------------
/RTTM_Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/RTTM_Logo.png
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |

5 |
6 |
Real Time Threat Monitoring Tool V2.0
7 |

8 |
9 |
10 | Monitoring possible threats of your company on Internet is an impossible task to be achieved manually. Hence many threats of the company goes unnoticed until it becomes viral in public. Thus causing monetary/reputation damage. This is where RTTM comes into action.
11 | RTTM (Real Time Threat Monitoring Tool) is a tool developed to scrap all pasties,github,reddit..etc in real time to identify occurrence of search terms configured. Upon match an email will be triggered. Thus allowing company to react in case of leakage of code, any hacks tweeted..etc.. and harden themselves against an attack before it goes viral.
12 |
13 | Over the past 2 years the tool has evolved from simple search. Artificial intelligence has been implemented to perform better search based on context. If regex is needed even that is supported. Thus behaviour is close to human and reduces false positives.
14 |
15 | The best part of tool is that alert will be sent to email in less that 60 seconds from the time threat has made it to interent. Thus allowing response in real time to happen..
16 |
17 |
18 |
19 | The same tool in malicious user hands can be used offensively to get update on any latest hacks, code leakage etc..
20 |
21 | List of sites which will be monitored are:
22 |
23 | - Non-Pastie Sites
24 |
25 | - Twitter
26 | - Reddit
27 | - Github
28 |
29 | - Pastie Sites
30 |
31 | - Pastebin.com
32 | - Codepad.org
33 | - Dumpz.org
34 | - Snipplr.com
35 | - Paste.org.ru
36 | - Gist.github.com
37 | - Pastebin.ca
38 | - Kpaste.net
39 | - Slexy.org
40 | - Ideone.com
41 | - Pastebin.fr
42 |
43 |
44 |
45 |
46 |
47 | Architecture:
48 |
49 |
50 | How it works?
51 | Once the tool is started , engine gets kicked off and it runs forever. The main input for this engine is the configuration file. Based on the configuration file data, engine goes ahead and probes twitter/github/reddit for matches configured in configuration file. Upon a match is found, the link of twitter/github/reddit pushed to sqlite DB and an email alert is triggered.
52 |
53 | In case of pastie sites the logic is different. The reason being they do not support search nor streaming api's. Hence any new pastie made by any user, the link is fetched and pushed to kafka. From kafka any new link added is picked up and searched for matches configured in configuration file. Upon a match is found, the link of pastie site is pushed to sqlite DB and an email alert is triggered.
54 |
55 | Over the past 2 years the tool has evolved from simple search. Artificial intelligence has been implemented to perform better search based on context. If regex is needed even that is supported. Thus behaviour is close to human and reduces false positives.
56 |
57 | Detailed Tool Documentation:
58 | https://real-time-threat-monitoring.readthedocs.io/en/latest/
59 |
60 |
61 | Developers:
62 |
63 | Authors:
64 |
65 | - Naveen Rudrappa
66 |
67 |
68 | Contributors:
69 |
70 | - Sunny Sharma
71 | - Murali Segu
72 |
73 |
74 |
--------------------------------------------------------------------------------
/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/architecture.png
--------------------------------------------------------------------------------
/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/.DS_Store
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/build/doctrees/apiKeySetUp.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/apiKeySetUp.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/architecture.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/architecture.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/configuration.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/configuration.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/contents.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/contents.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/contributors.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/contributors.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/debugging.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/debugging.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/environment.pickle
--------------------------------------------------------------------------------
/docs/build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/index.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/intro.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/intro.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/license.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/license.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/roadmap.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/roadmap.doctree
--------------------------------------------------------------------------------
/docs/build/doctrees/setupGuide.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/setupGuide.doctree
--------------------------------------------------------------------------------
/docs/build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: b0c4f5b42daa0bbaa7b8887f888115eb
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 |
--------------------------------------------------------------------------------
/docs/build/html/_images/RTTM_Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_images/RTTM_Logo.png
--------------------------------------------------------------------------------
/docs/build/html/_images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_images/architecture.png
--------------------------------------------------------------------------------
/docs/build/html/_sources/apiKeySetUp.md.txt:
--------------------------------------------------------------------------------
1 | API Key Setup
2 | =============
3 |
4 | Twitter API Key:
5 | ----------------
6 | * Go to https://dev.twitter.com/apps/new and log in, if necessary
7 |
8 | * Enter your Application Name, Description and your website address. You can leave the callback URL empty.
9 |
10 | * Accept the TOS, and solve the CAPTCHA.
11 |
12 | * Submit the form by clicking the Create your Twitter Application
13 |
14 | * Copy the consumer key (API key) and consumer secret from the screen into your application
15 |
16 | Github API Key:
17 | ---------------
18 |
19 | Refer link https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line for detailed information.
20 |
21 | Pastebin.com
22 | -------------
23 | In case of pastebin.com it is not neccessary to generate apiKey but you need to whitelist your IP. For this you have to pay and get your IP whitelisted.
24 |
25 | Refer https://pastebin.com/doc_scraping_api for detailed information.
26 |
--------------------------------------------------------------------------------
/docs/build/html/_sources/architecture.md.txt:
--------------------------------------------------------------------------------
1 | Architecture
2 | =============
3 |
4 | .. image:: _static/architecture.png
5 | :width: 800
6 | :alt: Alternative text
7 |
8 | The architectural diagram of the tool is as above.
9 |
10 | How it works
11 | -----------------
12 |
13 | Once the tool is started , engine gets kicked off and it runs forever. The main input for this engine is the configuration file. Based on the configuration file data, engine goes ahead and probes twitter/github/reddit for matches configured in configuration file. Upon a match is found, the link of twitter/github/reddit pushed to sqlite DB and an email alert is triggered.
14 |
15 | In case of pastie sites the logic is different. The reason being they do not support search nor streaming api's. Hence any new pastie made by any user, the link is fetched and pushed to kafka. From kafka any new link added is picked up and searched for matches configured in configuration file. Upon a match is found, the link of pastie site is pushed to sqlite DB and an email alert is triggered.
--------------------------------------------------------------------------------
/docs/build/html/_sources/configuration.md.txt:
--------------------------------------------------------------------------------
1 | Configuration
2 | =============
3 |
4 | Before using this tool is is neccessary to understand the properties file present in scrapper_config directory.
5 |
6 | consumer.properties
7 | ------------------------
8 | Holds all the neccessary config data needed for consumer of Kafka (Refer apache Kafka guide for more information). The values present here are default options and does nto require any changes
9 |
10 | producer.properties
11 | ------------------------
12 | Holds all the neccessary config data needed for Producer (Refer apache Kafka guide for more information).The values present here are default options and does nto require any changes
13 |
14 | email.properties
15 | ------------------------
16 | Holds all the configuration data to send email.
17 |
18 | scanner-configuration.properties
19 | -------------------------------------
20 | This is the core configuration file. Update all the config for enabling search on twitter/github(To get tokens and key refer respective sites).
21 |
22 | For pastie sites and reddit there is no need for any changes in config.
23 |
24 | *Note:However in all cases make sure to change "searchterms" to values of our choice to search. If there are multiple search terms then add them seperate by comma like the example data provided in config file.*
25 |
26 | **Understanding more about scanner-configuration.properties file.**
27 |
28 | For any pastie site configuration is as below:
29 |
30 | *Note:leave the pastie sites configuration as is and just change the search terms as requried by the organization. This will do good.*
31 |
32 | * scrapper.(pastie name).profile=(Pastie profile name)
33 |
34 | * scrapper.(pastie name).homeurl=(URL from where pastie ids a extracted)
35 |
36 | * scrapper.(pastie name).regex=(Regex to fetch pastie ids)
37 |
38 | * scrapper.(pastie name).downloadurl= (URL to get information about each apstie)
39 |
40 | * scrapper.(pastie name).searchterms=(Mention terms to be searched seperated by comma)
41 |
42 | * scrapper.(pastie name).timetosleep=(Time for which pastie thread will sleep before fetching pastie ids again)
43 |
44 | For github search configuration is as below:
45 |
46 | * scrapper.github.profile=Github
47 |
48 | * scrapper.github.baseurl=https://api.github.com/search/code?q={searchTerm}&sort=indexed&order=asc
49 |
50 | * scrapper.github.access_token=(Get your own github access token)
51 |
52 | * scrapper.github.searchterms=(Mention terms to be searched seperated by comma)
53 |
54 | * scrapper.github.timetosleep=(Time for which github thred should sleep before searching again)
55 |
56 |
57 | For reditt search configuration is as below:
58 | * scrapper.reddit.profile=Reddit
59 |
60 | * scrapper.reddit.baseurl=https://www.reddit.com/search.json?q={searchterm}
61 |
62 | * scrapper.reddit.searchterms=(Mention terms to be searched seperated by comma)
63 |
64 | * scrapper.reddit.timetosleep=(Time for which github thred should sleep before searching again)
65 |
66 |
67 | For Twitter search configuration is as below:
68 | * scrapper.twitter.apikey=test
69 |
70 | * scrapper.twitter.profile=Twitter
71 |
72 | * scrapper.twitter.searchterms=(Mention terms to be searched seperated by comma)
73 |
74 | * scrapper.twitter.consumerKey=(Get your own consumer key)
75 |
76 | * scrapper.twitter.consumerSecret=(Get your own consumerSecret)
77 |
78 | * scrapper.twitter.accessToken=(Get your own accessToken)
79 |
80 | * scrapper.twitter.accessTokenSecret=(Get your own accessTokenSecret)
81 |
--------------------------------------------------------------------------------
/docs/build/html/_sources/contents.rst.txt:
--------------------------------------------------------------------------------
1 | .. Real Time Threat Monitoring Tool documentation master file, created by
2 | sphinx-quickstart on Sun Nov 10 06:40:56 2019.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Real Time Threat Monitoring Tool's documentation!
7 | ============================================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 | intro
14 | architecture
15 | configuration
16 | apiKeySetUp
17 | setupGuide
18 | debugging
19 | roadmap
20 | contributors
21 | license
22 |
--------------------------------------------------------------------------------
/docs/build/html/_sources/contributors.md.txt:
--------------------------------------------------------------------------------
1 | Contributors
2 | ============
3 |
4 | Well, lets accept the fact that nothing goes well without contributors. Here is the list of people who have helped (`@rttmscrapper `_) grow in its first phase.
5 |
6 | Author:
7 | ------------------------
8 |
9 | Folks who took out time from busy schedule and got their hands dirty with the code:
10 |
11 | * Naveen Rudrappa
12 |
13 | Contributors:
14 | ------------------------
15 | * Murali Krishna Segu
16 |
17 | * Sunny Sharma
18 |
19 | Mentors:
20 | -------------
21 |
22 | Chaps who were generous enough to give feedback and suggest changes:
23 |
24 | * Murali Krishna Segu
25 |
--------------------------------------------------------------------------------
/docs/build/html/_sources/debugging.md.txt:
--------------------------------------------------------------------------------
1 | Debugging
2 | =========
3 |
4 | Follow below steps in case you find issues while working of tool.
5 |
6 | * Whenever you face issue with tool. Look into logs what is displayed. Sometime apiKey will expired you may have to regenerate it.
7 |
8 | * Run *clean.sh* in scrip folder.
9 |
10 | * Now run *intialize.sh*
11 |
12 | * Now run *java -jar scraptool/target/scraptool-1.0-SNAPSHOT-standalone.jar -t test -c /home/n0r00ij/RTS/scrapper_config/*
--------------------------------------------------------------------------------
/docs/build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
1 | .. Real Time Threat Monitoring Tool documentation master file, created by
2 | sphinx-quickstart on Sun Nov 10 06:40:56 2019.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Real Time Threat Monitoring Tool's documentation!
7 | ============================================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 | intro
14 | architecture
15 | configuration
16 | apiKeySetUp
17 | setupGuide
18 | debugging
19 | roadmap
20 | contributors
21 | license
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/build/html/_sources/intro.md.txt:
--------------------------------------------------------------------------------
1 | Intro
2 | =====
3 |
4 | .. image:: _static/RTTM_Logo.png
5 | :scale: 30 %
6 | :align: right
7 | :class: intro-logo
8 |
9 | Why this tool?
10 | ------------------
11 | Monitoring possible threats of your company on Internet is an impossible task to be achieved manually. Hence many threats of the company goes unnoticed until it becomes viral in public. Thus causing monetary/reputation damage. This is where RTTM comes into action. RTTM (Real Time Threat Monitoring Tool) is a tool developed to scrap all pasties,github,reddit..etc in real time to identify occurrence of search terms configured. Upon match an email will be triggered. Thus allowing company to react in case of leakage of code, any hacks tweeted..etc.. and harden themselves against an attack before it goes viral.
12 |
13 | Over the past 2 years the tool has evolved from simple search. Artificial intelligence has been implemented to perform better search based on context. If regex is needed even that is supported. Thus behaviour is close to human and reduces false positives.
14 |
15 | The best part of tool is that alert will be sent to email in less that 60 seconds from the time threat has made it to interent. Thus allowing response in real time to happen..
16 |
17 | The same tool in malicious user hands can be used offensively to get update on any latest hacks, code leakage etc..
18 |
19 | List of sites which will be monitored are:
20 |
21 | Non-Pastie Sites:
22 | ------------------------
23 |
24 | * Twitter
25 |
26 | * Reddit
27 |
28 | * Github
29 |
30 |
31 | Pastie Sites
32 | ------------------------
33 |
34 | * Pastebin.com
35 |
36 | * Codepad.org
37 |
38 | * Dumpz.org
39 |
40 | * Snipplr.com
41 |
42 | * Paste.org.ru
43 |
44 | * Gist.github.com
45 |
46 | * Pastebin.ca
47 |
48 | * Kpaste.net
49 |
50 | * Slexy.org
51 |
52 | * Ideone.com
53 |
54 | * Pastebin.fr
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/docs/build/html/_sources/license.md.txt:
--------------------------------------------------------------------------------
1 | License
2 | =======
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
--------------------------------------------------------------------------------
/docs/build/html/_sources/roadmap.md.txt:
--------------------------------------------------------------------------------
1 | RoadMap
2 | =======
3 |
4 | We are having following ideas to include in upcoming versions:
5 |
6 | * Enhance artificial intelligence for search
7 |
8 | * Implement worker nodes for searching
9 |
10 | * Support google dork,linkedin,fark web etc..
11 |
12 | * Support other DB's like postgress/oracle/Mongo etc..
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/docs/build/html/_sources/setupGuide.md.txt:
--------------------------------------------------------------------------------
1 | SetupGuide
2 | ==========
3 |
4 | Install via Docker
5 | ---------------------------------------------------
6 |
7 | * Install docker in your system
8 |
9 | * Download Dockerfile from https://github.com/NaveenRudra/RTTM
10 |
11 | * Change directory to RTTM
12 |
13 | * execute *docker build .*
14 |
15 | * Run *docker exec -it /bin/bash*
16 |
17 | * Now once in docker navigate to */opt/RTTM/script*
18 |
19 | * Run *intialize.sh* script. This will boot mysql server and starts kafka.
20 |
21 | * Run *db_setup.sh* this will created needed table.
22 |
23 | * Now from */opt/RTTM* run command *java -jar scraptool/target/scraptool-1.0-SNAPSHOT-standalone.jar -t test -c /home/n0r00ij/RTS/scrapper_config/*
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docs/build/html/_static/RTTM_Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/RTTM_Logo.png
--------------------------------------------------------------------------------
/docs/build/html/_static/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/architecture.png
--------------------------------------------------------------------------------
/docs/build/html/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
2 |
--------------------------------------------------------------------------------
/docs/build/html/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* This file intentionally left blank. */
2 |
--------------------------------------------------------------------------------
/docs/build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
1 | var DOCUMENTATION_OPTIONS = {
2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
3 | VERSION: '1.0',
4 | LANGUAGE: 'None',
5 | COLLAPSE_INDEX: false,
6 | FILE_SUFFIX: '.html',
7 | HAS_SOURCE: true,
8 | SOURCELINK_SUFFIX: '.txt',
9 | NAVIGATION_WITH_KEYS: false
10 | };
--------------------------------------------------------------------------------
/docs/build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/file.png
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Inconsolata-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Inconsolata-Bold.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Inconsolata-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Inconsolata-Regular.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Inconsolata.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Inconsolata.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato-Bold.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato-Regular.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bold.eot
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bold.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bold.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bold.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-italic.eot
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-italic.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-italic.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-italic.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-regular.eot
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-regular.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-regular.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-regular.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab-Bold.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab-Regular.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/docs/build/html/_static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/docs/build/html/_static/js/theme.js:
--------------------------------------------------------------------------------
1 | /* sphinx_rtd_theme version 0.4.3 | MIT license */
2 | /* Built 20190212 16:02 */
3 | require=function r(s,a,l){function c(e,n){if(!a[e]){if(!s[e]){var i="function"==typeof require&&require;if(!n&&i)return i(e,!0);if(u)return u(e,!0);var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}var o=a[e]={exports:{}};s[e][0].call(o.exports,function(n){return c(s[e][1][n]||n)},o,o.exports,r,s,a,l)}return a[e].exports}for(var u="function"==typeof require&&require,n=0;n"),i("table.docutils.footnote").wrap(""),i("table.docutils.citation").wrap(""),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var e=i(this);expand=i(''),expand.on("click",function(n){return t.toggleCurrent(e),n.stopPropagation(),!1}),e.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var r=0,n=["ms","moz","webkit","o"],e=0;eNUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/source/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/source/.DS_Store
--------------------------------------------------------------------------------
/docs/source/_static/RTTM_Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/source/_static/RTTM_Logo.png
--------------------------------------------------------------------------------
/docs/source/_static/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/source/_static/architecture.png
--------------------------------------------------------------------------------
/docs/source/apiKeySetUp.md:
--------------------------------------------------------------------------------
1 | API Key Setup
2 | =============
3 |
4 | Twitter API Key:
5 | ----------------
6 | * Go to https://dev.twitter.com/apps/new and log in, if necessary
7 |
8 | * Enter your Application Name, Description and your website address. You can leave the callback URL empty.
9 |
10 | * Accept the TOS, and solve the CAPTCHA.
11 |
12 | * Submit the form by clicking the Create your Twitter Application
13 |
14 | * Copy the consumer key (API key) and consumer secret from the screen into your application
15 |
16 | Github API Key:
17 | ---------------
18 |
19 | Refer link https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line for detailed information.
20 |
21 | Pastebin.com
22 | -------------
23 | In case of pastebin.com it is not neccessary to generate apiKey but you need to whitelist your IP. For this you have to pay and get your IP whitelisted.
24 |
25 | Refer https://pastebin.com/doc_scraping_api for detailed information.
26 |
--------------------------------------------------------------------------------
/docs/source/architecture.md:
--------------------------------------------------------------------------------
1 | Architecture
2 | =============
3 |
4 | .. image:: _static/architecture.png
5 | :width: 800
6 | :alt: Alternative text
7 |
8 | The architectural diagram of the tool is as above.
9 |
10 | How it works
11 | -----------------
12 |
13 | Once the tool is started , engine gets kicked off and it runs forever. The main input for this engine is the configuration file. Based on the configuration file data, engine goes ahead and probes twitter/github/reddit for matches configured in configuration file. Upon a match is found, the link of twitter/github/reddit pushed to sqlite DB and an email alert is triggered.
14 |
15 | In case of pastie sites the logic is different. The reason being they do not support search nor streaming api's. Hence any new pastie made by any user, the link is fetched and pushed to kafka. From kafka any new link added is picked up and searched for matches configured in configuration file. Upon a match is found, the link of pastie site is pushed to sqlite DB and an email alert is triggered.
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = 'Real Time Threat Monitoring Tool'
21 | copyright = '2019, Naveen Rudrappa'
22 | author = 'Naveen Rudrappa'
23 |
24 | # The full version, including alpha/beta/rc tags
25 | release = '1.0'
26 |
27 |
28 | # -- General configuration ---------------------------------------------------
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | ]
35 |
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 |
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = []
43 |
44 | source_suffix=['.rst','.md']
45 |
46 |
47 | # -- Options for HTML output -------------------------------------------------
48 |
49 | # The theme to use for HTML and HTML Help pages. See the documentation for
50 | # a list of builtin themes.
51 | #
52 | html_theme = 'sphinx_rtd_theme'
53 |
54 | # Add any paths that contain custom static files (such as style sheets) here,
55 | # relative to this directory. They are copied after the builtin static files,
56 | # so a file named "default.css" will overwrite the builtin "default.css".
57 | html_static_path = ['_static']
--------------------------------------------------------------------------------
/docs/source/configuration.md:
--------------------------------------------------------------------------------
1 | Configuration
2 | =============
3 |
4 | Before using this tool is is neccessary to understand the properties file present in scrapper_config directory.
5 |
6 | consumer.properties
7 | ------------------------
8 | Holds all the neccessary config data needed for consumer of Kafka (Refer apache Kafka guide for more information). The values present here are default options and does nto require any changes
9 |
10 | producer.properties
11 | ------------------------
12 | Holds all the neccessary config data needed for Producer (Refer apache Kafka guide for more information).The values present here are default options and does nto require any changes
13 |
14 | email.properties
15 | ------------------------
16 | Holds all the configuration data to send email.
17 |
18 | scanner-configuration.properties
19 | -------------------------------------
20 | This is the core configuration file. Update all the config for enabling search on twitter/github(To get tokens and key refer respective sites).
21 |
22 | For pastie sites and reddit there is no need for any changes in config.
23 |
24 | *Note:However in all cases make sure to change "searchterms" to values of our choice to search. If there are multiple search terms then add them seperate by comma like the example data provided in config file.*
25 |
26 | **Understanding more about scanner-configuration.properties file.**
27 |
28 | For any pastie site configuration is as below:
29 |
30 | *Note:leave the pastie sites configuration as is and just change the search terms as requried by the organization. This will do good.*
31 |
32 | * scrapper.(pastie name).profile=(Pastie profile name)
33 |
34 | * scrapper.(pastie name).homeurl=(URL from where pastie ids a extracted)
35 |
36 | * scrapper.(pastie name).regex=(Regex to fetch pastie ids)
37 |
38 | * scrapper.(pastie name).downloadurl= (URL to get information about each apstie)
39 |
40 | * scrapper.(pastie name).searchterms=(Mention terms to be searched seperated by comma)
41 |
42 | * scrapper.(pastie name).timetosleep=(Time for which pastie thread will sleep before fetching pastie ids again)
43 |
44 | For github search configuration is as below:
45 |
46 | * scrapper.github.profile=Github
47 |
48 | * scrapper.github.baseurl=https://api.github.com/search/code?q={searchTerm}&sort=indexed&order=asc
49 |
50 | * scrapper.github.access_token=(Get your own github access token)
51 |
52 | * scrapper.github.searchterms=(Mention terms to be searched seperated by comma)
53 |
54 | * scrapper.github.timetosleep=(Time for which github thred should sleep before searching again)
55 |
56 |
57 | For reditt search configuration is as below:
58 | * scrapper.reddit.profile=Reddit
59 |
60 | * scrapper.reddit.baseurl=https://www.reddit.com/search.json?q={searchterm}
61 |
62 | * scrapper.reddit.searchterms=(Mention terms to be searched seperated by comma)
63 |
64 | * scrapper.reddit.timetosleep=(Time for which github thred should sleep before searching again)
65 |
66 |
67 | For Twitter search configuration is as below:
68 | * scrapper.twitter.apikey=test
69 |
70 | * scrapper.twitter.profile=Twitter
71 |
72 | * scrapper.twitter.searchterms=(Mention terms to be searched seperated by comma)
73 |
74 | * scrapper.twitter.consumerKey=(Get your own consumer key)
75 |
76 | * scrapper.twitter.consumerSecret=(Get your own consumerSecret)
77 |
78 | * scrapper.twitter.accessToken=(Get your own accessToken)
79 |
80 | * scrapper.twitter.accessTokenSecret=(Get your own accessTokenSecret)
81 |
--------------------------------------------------------------------------------
/docs/source/contents.rst:
--------------------------------------------------------------------------------
1 | .. Real Time Threat Monitoring Tool documentation master file, created by
2 | sphinx-quickstart on Sun Nov 10 06:40:56 2019.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Real Time Threat Monitoring Tool's documentation!
7 | ============================================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 | intro
14 | architecture
15 | configuration
16 | apiKeySetUp
17 | setupGuide
18 | debugging
19 | roadmap
20 | contributors
21 | license
22 |
--------------------------------------------------------------------------------
/docs/source/contributors.md:
--------------------------------------------------------------------------------
1 | Contributors
2 | ============
3 |
4 | Well, lets accept the fact that nothing goes well without contributors. Here is the list of people who have helped (`@rttmscrapper `_) grow in its first phase.
5 |
6 | Author:
7 | ------------------------
8 |
9 | Folks who took out time from busy schedule and got their hands dirty with the code:
10 |
11 | * Naveen Rudrappa
12 |
13 | Contributors:
14 | ------------------------
15 | * Murali Krishna Segu
16 |
17 | * Sunny Sharma
18 |
19 | Mentors:
20 | -------------
21 |
22 | Chaps who were generous enough to give feedback and suggest changes:
23 |
24 | * Murali Krishna Segu
25 |
--------------------------------------------------------------------------------
/docs/source/debugging.md:
--------------------------------------------------------------------------------
1 | Debugging
2 | =========
3 |
4 | Follow below steps in case you find issues while working of tool.
5 |
6 | * Whenever you face issue with tool. Look into logs what is displayed. Sometime apiKey will expired you may have to regenerate it.
7 |
8 | * Run *clean.sh* in scrip folder.
9 |
10 | * Now run *intialize.sh*
11 |
12 | * Now run *java -jar scraptool/target/scraptool-1.0-SNAPSHOT-standalone.jar -t test -c /home/n0r00ij/RTS/scrapper_config/*
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. Real Time Threat Monitoring Tool documentation master file, created by
2 | sphinx-quickstart on Sun Nov 10 06:40:56 2019.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to Real Time Threat Monitoring Tool's documentation!
7 | ============================================================
8 |
9 | .. toctree::
10 | :maxdepth: 2
11 | :caption: Contents:
12 |
13 | intro
14 | architecture
15 | configuration
16 | apiKeySetUp
17 | setupGuide
18 | debugging
19 | roadmap
20 | contributors
21 | license
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/source/intro.md:
--------------------------------------------------------------------------------
1 | Intro
2 | =====
3 |
4 | .. image:: _static/RTTM_Logo.png
5 | :scale: 30 %
6 | :align: right
7 | :class: intro-logo
8 |
9 | Why this tool?
10 | ------------------
11 | Monitoring possible threats of your company on Internet is an impossible task to be achieved manually. Hence many threats of the company goes unnoticed until it becomes viral in public. Thus causing monetary/reputation damage. This is where RTTM comes into action. RTTM (Real Time Threat Monitoring Tool) is a tool developed to scrap all pasties,github,reddit..etc in real time to identify occurrence of search terms configured. Upon match an email will be triggered. Thus allowing company to react in case of leakage of code, any hacks tweeted..etc.. and harden themselves against an attack before it goes viral.
12 |
13 | Over the past 2 years the tool has evolved from simple search. Artificial intelligence has been implemented to perform better search based on context. If regex is needed even that is supported. Thus behaviour is close to human and reduces false positives.
14 |
15 | The best part of tool is that alert will be sent to email in less that 60 seconds from the time threat has made it to interent. Thus allowing response in real time to happen..
16 |
17 | The same tool in malicious user hands can be used offensively to get update on any latest hacks, code leakage etc..
18 |
19 | List of sites which will be monitored are:
20 |
21 | Non-Pastie Sites:
22 | ------------------------
23 |
24 | * Twitter
25 |
26 | * Reddit
27 |
28 | * Github
29 |
30 |
31 | Pastie Sites
32 | ------------------------
33 |
34 | * Pastebin.com
35 |
36 | * Codepad.org
37 |
38 | * Dumpz.org
39 |
40 | * Snipplr.com
41 |
42 | * Paste.org.ru
43 |
44 | * Gist.github.com
45 |
46 | * Pastebin.ca
47 |
48 | * Kpaste.net
49 |
50 | * Slexy.org
51 |
52 | * Ideone.com
53 |
54 | * Pastebin.fr
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/docs/source/license.md:
--------------------------------------------------------------------------------
1 | License
2 | =======
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
--------------------------------------------------------------------------------
/docs/source/roadmap.md:
--------------------------------------------------------------------------------
1 | RoadMap
2 | =======
3 |
4 | We are having following ideas to include in upcoming versions:
5 |
6 | * Enhance artificial intelligence for search
7 |
8 | * Implement worker nodes for searching
9 |
10 | * Support google dork,linkedin,fark web etc..
11 |
12 | * Support other DB's like postgress/oracle/Mongo etc..
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/docs/source/setupGuide.md:
--------------------------------------------------------------------------------
1 | SetupGuide
2 | ==========
3 |
4 | Install via Docker
5 | ---------------------------------------------------
6 |
7 | * Install docker in your system
8 |
9 | * Download Dockerfile from https://github.com/NaveenRudra/RTTM
10 |
11 | * Change directory to RTTM
12 |
13 | * execute *docker build .*
14 |
15 | * Run *docker exec -it /bin/bash*
16 |
17 | * Now once in docker navigate to */opt/RTTM/script*
18 |
19 | * Run *intialize.sh* script. This will boot mysql server and starts kafka.
20 |
21 | * Run *db_setup.sh* this will created needed table.
22 |
23 | * Now from */opt/RTTM* run command *java -jar scraptool/target/scraptool-1.0-SNAPSHOT-standalone.jar -t test -c /home/n0r00ij/RTS/scrapper_config/*
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/kafka-parser/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/kafka-parser/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/kafka-parser/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | kafka-parser
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/kafka-parser/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/main/resources=UTF-8
4 | encoding//src/test/java=UTF-8
5 | encoding/=UTF-8
6 |
--------------------------------------------------------------------------------
/kafka-parser/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/kafka-parser/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
6 | 4.0.0
7 |
8 | com.rts.scrap
9 | rts
10 | 1.0-SNAPSHOT
11 |
12 | com.rts.scrap
13 | kafka-parser
14 | 1.0-SNAPSHOT
15 | kafka-parser
16 | http://maven.apache.org
17 |
18 | UTF-8
19 |
20 |
21 |
22 |
23 | net.amygdalum
24 | stringsearchalgorithms
25 | 0.3.4
26 |
27 |
28 | org.freemarker
29 | freemarker
30 | 2.3.20
31 |
32 |
33 | javax.mail
34 | mail
35 | 1.4
36 |
37 |
38 | com.fasterxml.jackson.core
39 | jackson-databind
40 | 2.9.0
41 |
42 |
43 | com.fasterxml.jackson.core
44 | jackson-annotations
45 | 2.9.0
46 |
47 |
48 | com.fasterxml.jackson.core
49 | jackson-core
50 | 2.9.0
51 |
52 |
53 |
54 | org.apache.commons
55 | commons-io
56 | 1.3.2
57 |
58 |
59 | org.apache.commons
60 | commons-lang3
61 | 3.1
62 |
63 |
64 | org.apache.kafka
65 | kafka-clients
66 | 0.9.0.0
67 |
68 |
69 | com.google.guava
70 | guava
71 | 18.0
72 |
73 |
74 | com.rts.scrap
75 | sqlite-dataaccess
76 | 1.0-SNAPSHOT
77 |
78 |
79 | org.hdrhistogram
80 | HdrHistogram
81 | 2.1.8
82 |
83 |
84 | junit
85 | junit
86 | 4.9
87 | test
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/base/NotificationConsumerGroup.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.base;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | public final class NotificationConsumerGroup {
9 | private final int numberOfConsumers;
10 |
11 | private final String topic;
12 | private List consumers;
13 |
14 |
15 | public NotificationConsumerGroup(int numberOfConsumers,String topic,File configDirectoryfile) throws IOException {
16 | this.topic=topic;
17 | this.numberOfConsumers = numberOfConsumers;
18 | consumers = new ArrayList<>();
19 | for (int i = 0; i < this.numberOfConsumers; i++) {
20 | NotificationConsumerThread ncThread =
21 | new NotificationConsumerThread(this.topic,configDirectoryfile);
22 | consumers.add(ncThread);
23 | }
24 | }
25 |
26 | public void execute() {
27 | for (NotificationConsumerThread ncThread : consumers) {
28 | Thread t = new Thread(ncThread);
29 | t.start();
30 | }
31 | }
32 |
33 |
34 |
35 | public int getNumberOfConsumers() {
36 | return numberOfConsumers;
37 | }
38 |
39 |
40 |
41 |
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/base/NotificationConsumerThread.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.base;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.io.File;
5 | import java.io.IOException;
6 | import java.nio.charset.Charset;
7 | import java.nio.file.Files;
8 | import java.nio.file.Paths;
9 | import java.util.ArrayList;
10 | import java.util.Arrays;
11 | import java.util.List;
12 | import java.util.Properties;
13 | import java.util.Random;
14 | import org.apache.kafka.clients.consumer.ConsumerRecord;
15 | import org.apache.kafka.clients.consumer.ConsumerRecords;
16 | import org.apache.kafka.clients.consumer.KafkaConsumer;
17 | import org.kafkaparser.utilities.ConfigData;
18 | import org.kafkaparser.utilities.PastieParseAndSearch;
19 | import org.kafkaparser.pojo.Data;
20 |
21 | public class NotificationConsumerThread implements Runnable {
22 |
23 | private final KafkaConsumer consumer;
24 | private final String topic;
25 | private static List userAgents = new ArrayList<>();
26 |
27 |
28 | public static void initialize(String configDirectory)
29 | {
30 |
31 | try {
32 | //userAgents = Files.readAllLines(Paths.get(ConfigData.configDirectory,ConfigData.useragents_listPropertiesFileName),
33 | // Charset.defaultCharset());
34 | //static block is initilized before initilaizing variables is causing issue. Danger comment
35 | ConfigData.userAgents = Files.readAllLines(Paths.get(ConfigData.configDirectory,ConfigData.useragents_listPropertiesFileName),
36 | Charset.defaultCharset());
37 | Properties prop = getConfig(new File(configDirectory),"global.properties");
38 | ConfigData.pythonPath = prop.getProperty("pythonpath");
39 | ConfigData.trufflehogPath = prop.getProperty("trufflehogpath");
40 | //ConfigData.pythonPath = prop.getProperty("");
41 |
42 | } catch (IOException e) {
43 | // TODO Auto-generated catch block
44 | e.printStackTrace();
45 | }
46 |
47 |
48 | }
49 |
50 | public NotificationConsumerThread(String topic,File configDirectoryfile) throws IOException {
51 | Properties prop = getConfig(configDirectoryfile,"consumer.properties");
52 | this.consumer = new KafkaConsumer<>(prop);
53 | this.topic = topic;
54 | this.consumer.subscribe(Arrays.asList(this.topic));
55 |
56 | initialize(configDirectoryfile.getAbsolutePath());
57 |
58 |
59 |
60 | }
61 |
62 | private static Properties getConfig(File configDirectoryfile,String propFileName) throws IOException {
63 | Properties properties = new Properties();
64 | properties.load(new ByteArrayInputStream(Files.readAllBytes(new File(configDirectoryfile, propFileName).toPath())));
65 | if (properties.getProperty("group.id") == null) {
66 | properties.setProperty("group.id", "group-" + new Random().nextInt(100000));
67 | }
68 | return properties;
69 | }
70 |
71 | @Override
72 | public void run() {
73 | try {
74 | while (true)
75 | {
76 | ConsumerRecords records = consumer.poll(100);
77 | for (ConsumerRecord record : records)
78 | {
79 | /**System.out.println("Receive message: " + record.value() + ", Partition: "
80 | + record.partition() + ", Offset: " + record.offset() + ", by ThreadID: "
81 | + Thread.currentThread().getId());**/
82 | //System.out.println("Message recieved !!*********************************************************"+record.value().getUrl());
83 | //Make sure to enable below code for sending resposne and confirming if chanegs has been done or not
84 | Thread.sleep(200);
85 | PastieParseAndSearch.searchEachid(record.value());
86 | }
87 | }
88 | }
89 | catch (Exception e) {
90 | e.printStackTrace();
91 | }
92 | }
93 |
94 |
95 |
96 | }
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/base/Producer.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.base;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.io.File;
5 | import java.io.IOException;
6 | import java.nio.file.Files;
7 | import java.util.Properties;
8 | import org.apache.kafka.clients.producer.Callback;
9 | import org.apache.kafka.clients.producer.KafkaProducer;
10 | import org.apache.kafka.clients.producer.ProducerRecord;
11 | import org.apache.kafka.clients.producer.RecordMetadata;
12 | import org.kafkaparser.pojo.Data;
13 |
14 | public class Producer {
15 |
16 | private static KafkaProducer producer;
17 | public static void initialize(File configDirectory) throws IOException
18 | {
19 |
20 | Properties properties = new Properties();
21 | properties.load(new ByteArrayInputStream(Files.readAllBytes(new File(configDirectory, "producer.properties").toPath())));
22 | producer = new KafkaProducer<>(properties);
23 | }
24 |
25 | public static void send(Data data,String topic)
26 | {
27 | //do parsing of the urls from regex here
28 | try {
29 |
30 | producer.send(new ProducerRecord(topic, data), new Callback() {
31 | public void onCompletion(RecordMetadata metadata, Exception e) {
32 | if (e != null) {
33 | e.printStackTrace();
34 | }
35 | // System.out.println("Sent: Partition: " + metadata.partition() + ", Offset: "
36 | // + metadata.offset());
37 | }
38 | });
39 | } catch (Exception e) {
40 | e.printStackTrace();
41 | }
42 |
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/deseralize/DataDeserializer.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.deseralize;
2 |
3 | import java.util.Map;
4 |
5 | import com.fasterxml.jackson.databind.ObjectMapper;
6 | import org.kafkaparser.pojo.Data;
7 |
8 | @SuppressWarnings("unchecked")
9 | public class DataDeserializer implements org.apache.kafka.common.serialization.Deserializer{
10 |
11 | @Override
12 | public T deserialize(String var1, byte[] arg1) {
13 | ObjectMapper mapper = new ObjectMapper();
14 | Data data=null;
15 |
16 | try {
17 | data = mapper.readValue(arg1, Data.class);
18 | } catch (Exception e) {
19 |
20 | e.printStackTrace();
21 | }
22 | return (T) data;
23 | }
24 |
25 | @Override
26 | public void close() {
27 | // TODO Auto-generated method stub
28 |
29 | }
30 |
31 | @Override
32 | public void configure(@SuppressWarnings("rawtypes") Map arg0, boolean arg1) {
33 | // TODO Auto-generated method stub
34 |
35 | }
36 |
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/pojo/Data.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.pojo;
2 |
3 | import java.util.ArrayList;
4 |
5 | import com.fasterxml.jackson.annotation.JsonAutoDetect;
6 | import com.fasterxml.jackson.annotation.JsonCreator;
7 | import com.fasterxml.jackson.annotation.JsonProperty;
8 |
9 | @JsonAutoDetect
10 | public class Data {
11 |
12 | private ArrayList searchTerms;
13 | private String url;
14 |
15 | private String botName="RTS";
16 | private String trufflehogregex="true";
17 | private String trufflehogentropy="false";
18 |
19 |
20 | public String getTrufflehogregex() {
21 | return trufflehogregex;
22 | }
23 | public void setTrufflehogregex(String trufflehogregex) {
24 | this.trufflehogregex = trufflehogregex;
25 | }
26 | public String getTrufflehogentropy() {
27 | return trufflehogentropy;
28 | }
29 | public void setTrufflehogentropy(String trufflehogentropy) {
30 | this.trufflehogentropy = trufflehogentropy;
31 | }
32 |
33 | public String getBotName() {
34 | return botName;
35 | }
36 | public void setBotName(String botName) {
37 | this.botName = botName;
38 | }
39 | public ArrayList getSearchTerms() {
40 | return searchTerms;
41 | }
42 | public void setSearchTerms(ArrayList searchTerms) {
43 | this.searchTerms = searchTerms;
44 | }
45 |
46 | @JsonCreator
47 | public Data(@JsonProperty("url")String url,@JsonProperty("searchTerms")ArrayList searchTerms,@JsonProperty("botName") String botName,
48 | @JsonProperty("trufflehogregex") String trufflehogregex,@JsonProperty("trufflehogentropy") String trufflehogentropy)
49 | {
50 | this.url=url;
51 | this.searchTerms=searchTerms;
52 | this.botName = botName;
53 | this.trufflehogentropy=trufflehogentropy;
54 | this.trufflehogregex=trufflehogregex;
55 |
56 | }
57 | public String getUrl() {
58 | return url;
59 | }
60 | public void setUrl(String url) {
61 | this.url = url;
62 | }
63 |
64 |
65 |
66 |
67 |
68 | @Override public String toString()
69 | {
70 | return "data is "+url+"cool man";
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/serialize/DataSerializer.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.serialize;
2 |
3 | import java.util.Map;
4 |
5 | import com.fasterxml.jackson.databind.ObjectMapper;
6 |
7 | public class DataSerializer implements org.apache.kafka.common.serialization.Serializer{
8 |
9 | @Override
10 | public void close() {
11 | // TODO Auto-generated method stub
12 |
13 | }
14 |
15 | @Override
16 | public void configure(@SuppressWarnings("rawtypes") Map arg0, boolean arg1) {
17 | // TODO Auto-generated method stub
18 |
19 | }
20 |
21 | @Override
22 | public byte[] serialize(String arg0, Object arg1) {
23 | byte[] retVal = null;
24 | ObjectMapper objectMapper = new ObjectMapper();
25 | try {
26 | retVal = objectMapper.writeValueAsString(arg1).getBytes();
27 | } catch (Exception e) {
28 | e.printStackTrace();
29 | }
30 | return retVal;
31 | }
32 |
33 |
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/utilities/ConfigData.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.utilities;
2 |
3 | import java.util.List;
4 |
5 | public class ConfigData {
6 |
7 | public static String consumerPropertiesFileName = "consumer.properties";
8 | public static String emailPropertiesFileName = "email.properties";
9 | public static String producerPropertiesFileName = "producer.properties";
10 | public static String proxyPropertiesFileName = "proxy.properties";
11 | public static String scanner_configurationPropertiesFileName = "scanner-configuration.properties";
12 | public static String useragents_listPropertiesFileName = "useragents-list.txt";
13 | public static String configDirectory =null;
14 | public static String topicName=null;
15 | public static String pythonPath="";
16 | public static String trufflehogPath="";
17 | public static List userAgents;
18 | }
19 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/utilities/ConfigParams.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.utilities;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.util.Properties;
6 |
7 | import com.google.common.io.Resources;
8 |
9 | public class ConfigParams {
10 |
11 | public static String proxy_ip;
12 | public static String proxy_port;
13 |
14 | public static void initialzie() throws IOException
15 | {
16 | /** try (InputStream props = Resources.getResource("config.props").openStream()) {
17 | Properties properties = new Properties();
18 | properties.load(props);
19 | proxy_ip=properties.getProperty("proxy_ip");
20 | proxy_port=properties.getProperty("proxy_port");
21 | System.setProperty("http.proxySet", "true");
22 | System.setProperty("http.proxyHost",proxy_ip) ;
23 | System.setProperty("http.proxyPort", proxy_port) ;
24 | System.setProperty("https.proxySet", "true");
25 | System.setProperty("https.proxyHost",proxy_ip) ;
26 | System.setProperty("https.proxyPort", proxy_port) ;
27 | }**/
28 | }
29 |
30 |
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/utilities/DbUtil.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.utilities;
2 |
3 | import java.text.DateFormat;
4 | import java.text.SimpleDateFormat;
5 | import java.util.Date;
6 | import java.util.Set;
7 |
8 | import javax.persistence.TypedQuery;
9 |
10 | import org.kafkaparser.pojo.Data;
11 | import org.sqlite.dataaccess.entity.Result;
12 | import org.sqlite.dataaccess.entity.SearchItem;
13 | import org.sqlite.dataaccess.util.DaoUtil;
14 | import org.sqlite.dataaccess.util.EMfactory;
15 |
16 | public class DbUtil {
17 |
18 | private static DateFormat df = new SimpleDateFormat("dd/MM/yy HH:mm:ss");
19 | private static Date dateobj = new Date();
20 | private static String FUTURE_IMPLEMENTATION = "This is for future implementation";
21 |
22 | private static final SearchItem getSearchItem(final SearchItem searchItem) {
23 | try {
24 | final String query = "SELECT search_item FROM SearchItem search_item where search_item.searchItem=:searchItem";
25 | final TypedQuery typedQuery = EMfactory.em.createQuery(query, SearchItem.class);
26 | typedQuery.setParameter("searchItem", searchItem.getSearchItem());
27 | return typedQuery.getResultList().stream().findFirst().orElse(null);
28 | } catch (Exception e) {
29 | e.printStackTrace();
30 | }
31 | return null;
32 | }
33 |
34 | public static void addNewEntry(Set termsFound, Data data) {
35 |
36 | final Result result = new Result();
37 | result.setSearchedTerms(termsFound);
38 | result.setSearchedtext(FUTURE_IMPLEMENTATION);
39 | result.setBotName(data.getBotName());
40 | result.setUrl(data.getUrl());
41 | result.setTime(df.format(dateobj).toString());
42 | SearchItem item = null;
43 | for (SearchItem searchItem : termsFound) {
44 | item = getSearchItem(searchItem);
45 | if(item != null) {
46 | searchItem.setId(item.getId());
47 | }
48 | searchItem.addResult(result);
49 | }
50 | if(item != null) {
51 | DaoUtil.merge(result);
52 | } else {
53 | DaoUtil.insert(result);
54 | }
55 |
56 | }
57 |
58 | public static void addNewEntry(Set termsFound, String url) {
59 |
60 | final Result result = new Result();
61 | result.setSearchedTerms(termsFound);
62 | result.setSearchedtext(FUTURE_IMPLEMENTATION);
63 | result.setBotName("Future");
64 | result.setUrl(url);
65 | result.setTime(df.format(dateobj).toString());
66 | for (SearchItem searchItem : termsFound) {
67 | final SearchItem item = getSearchItem(searchItem);
68 | searchItem.setId(item.getId());
69 | searchItem.addResult(result);
70 | }
71 | DaoUtil.insert(result);
72 | }
73 |
74 | public static void addNewEntry(Set termsFound, String url, String botName) {
75 |
76 | final Result result = new Result();
77 | result.setSearchedTerms(termsFound);
78 | result.setSearchedtext(FUTURE_IMPLEMENTATION);
79 | result.setBotName(botName);
80 | result.setUrl(url);
81 | result.setTime(df.format(dateobj).toString());
82 | for (SearchItem searchItem : termsFound) {
83 | final SearchItem item = getSearchItem(searchItem);
84 | searchItem.setId(item.getId());
85 | searchItem.addResult(result);
86 | }
87 | DaoUtil.insert(result);
88 | }
89 |
90 | public static void addNewEntry(Set termsFound, String url, String botName, Boolean isValid) {
91 | final Result result = new Result();
92 | result.setSearchedTerms(termsFound);
93 | result.setSearchedtext(FUTURE_IMPLEMENTATION);
94 | result.setBotName(botName);
95 | result.setUrl(url);
96 | result.setTime(df.format(dateobj).toString());
97 | result.setIsValid(isValid);
98 | for (SearchItem searchItem : termsFound) {
99 | final SearchItem item = getSearchItem(searchItem);
100 | searchItem.setId(item.getId());
101 | searchItem.addResult(result);
102 | }
103 | DaoUtil.insert(result);
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/utilities/Git.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.utilities;
2 |
3 |
4 | import java.io.BufferedReader;
5 | import java.io.IOException;
6 | import java.io.InputStream;
7 | import java.io.InputStreamReader;
8 | import java.nio.file.Files;
9 | import java.nio.file.Path;
10 | import java.nio.file.Paths;
11 | import java.util.Objects;
12 |
13 | public class Git {
14 |
15 | // example of usage
16 | private static void initAndAddFile() throws IOException, InterruptedException {
17 | Path directory = Paths.get("c:\\temp\\example");
18 | Files.createDirectories(directory);
19 | gitInit(directory);
20 | Files.write(directory.resolve("example.txt"), new byte[0]);
21 | gitStage(directory);
22 | gitCommit(directory, "Add example.txt");
23 | }
24 |
25 | // example of usage
26 | private static void cloneAndAddFile() throws IOException, InterruptedException {
27 | String originUrl = "https://github.com/Crydust/TokenReplacer.git";
28 | Path directory = Paths.get("c:\\temp\\TokenReplacer");
29 | gitClone(directory, originUrl);
30 | Files.write(directory.resolve("example.txt"), new byte[0]);
31 | gitStage(directory);
32 | gitCommit(directory, "Add example.txt");
33 | gitPush(directory);
34 | }
35 |
36 | public static void gitInit(Path directory) throws IOException, InterruptedException {
37 | runCommand(directory, "git", "init");
38 | }
39 |
40 | public static void gitStage(Path directory) throws IOException, InterruptedException {
41 | runCommand(directory, "git", "add", "-A");
42 | }
43 |
44 | public static void gitCommit(Path directory, String message) throws IOException, InterruptedException {
45 | runCommand(directory, "git", "commit", "-m", message);
46 | }
47 |
48 | public static void gitPush(Path directory) throws IOException, InterruptedException {
49 | runCommand(directory, "git", "push");
50 | }
51 |
52 | public static void gitClone(Path directory, String originUrl) throws IOException, InterruptedException {
53 | runCommand(directory.getParent(), "git", "clone", originUrl, directory.getFileName().toString());
54 | }
55 |
56 | public static void runCommand(Path directory, String... command) throws IOException, InterruptedException {
57 | Objects.requireNonNull(directory, "directory");
58 | if (!Files.exists(directory)) {
59 | throw new RuntimeException("can't run command in non-existing directory '" + directory + "'");
60 | }
61 |
62 | ProcessBuilder pb = new ProcessBuilder()
63 | .command(command)
64 | .directory(directory.toFile());
65 | Process p = pb.start();
66 | StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), "ERROR");
67 | StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), "OUTPUT");
68 | outputGobbler.start();
69 | errorGobbler.start();
70 | int exit = p.waitFor();
71 | errorGobbler.join();
72 | outputGobbler.join();
73 | p.destroy();
74 | if (exit != 0) {
75 | throw new AssertionError(String.format("runCommand returned %d", exit));
76 | }
77 |
78 | }
79 |
80 | private static class StreamGobbler extends Thread {
81 |
82 | private final InputStream is;
83 | private final String type;
84 |
85 | private StreamGobbler(InputStream is, String type) {
86 | this.is = is;
87 | this.type = type;
88 | }
89 |
90 | @Override
91 | public void run() {
92 | try (BufferedReader br = new BufferedReader(new InputStreamReader(is));) {
93 | String line;
94 | while ((line = br.readLine()) != null) {
95 | //System.out.println(type + "> " + line);
96 | }
97 | } catch (IOException ioe) {
98 | ioe.printStackTrace();
99 | }
100 | }
101 | }
102 |
103 | }
104 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/utilities/HttpUtilities.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.utilities;
2 |
3 | import java.util.List;
4 | import java.io.BufferedReader;
5 | import java.io.File;
6 | import java.io.FileNotFoundException;
7 | import java.io.IOException;
8 | import java.io.InputStreamReader;
9 | import java.net.HttpURLConnection;
10 | import java.net.URISyntaxException;
11 | import java.net.URL;
12 | import java.nio.charset.Charset;
13 | import java.nio.file.Files;
14 | import java.nio.file.Paths;
15 | import java.util.ArrayList;
16 | import java.util.Random;
17 | import java.util.Scanner;
18 |
19 | import com.google.common.io.Resources;
20 |
21 |
22 |
23 |
24 | public class HttpUtilities {
25 |
26 |
27 |
28 |
29 | private static Random rand = new Random();
30 |
31 |
32 |
33 |
34 | public static void main(String [] args) throws IOException, InterruptedException
35 | {
36 | //ConfigParams.initialzie();
37 | HttpUtilities test=new HttpUtilities();
38 | System.out.println(test.sendGet("https://pastebin.com/archive"));
39 | //Search.extractRegexMatches(test.sendGet("https://pastebin.com/archive"), "href=\"/(\\w{8})\">");
40 | //HttpUtilities.parse("href=\"/(\\w{8})\">", test.sendGet("https://pastebin.com/archive"));
41 |
42 | }
43 |
44 |
45 |
46 | public static String sendGet(String url) throws InterruptedException
47 | {
48 |
49 |
50 |
51 | StringBuffer response =null;
52 | int numberofattempts=0;
53 | boolean recievedResponse=false;
54 |
55 | while(numberofattempts<5 && !recievedResponse)
56 | {
57 |
58 | try {
59 | URL obj = new URL(url);
60 | HttpURLConnection con = (HttpURLConnection) obj.openConnection();
61 | con.setRequestMethod("GET");
62 | con.setRequestProperty("User-Agent", ConfigData.userAgents.get(rand.nextInt(ConfigData.userAgents.size())));
63 | //con.setRequestProperty("User-Agent", User_Agent);
64 | BufferedReader in = new BufferedReader(
65 | new InputStreamReader(con.getInputStream()));
66 | String inputLine;
67 | response = new StringBuffer();
68 |
69 | while ((inputLine = in.readLine()) != null) {
70 | response.append(inputLine);
71 | }
72 | in.close();
73 | con.disconnect();
74 | recievedResponse=true;
75 | } catch (Exception e) {
76 | // TODO Auto-generated catch block
77 | //e.printStackTrace();
78 | numberofattempts+=1;
79 | if(numberofattempts==5)
80 | {
81 | System.out.println("Tried "+numberofattempts+" times and could not fetch data for :"+url);
82 | }
83 | Thread.sleep(10000);
84 | }
85 | }
86 | //print result
87 | if(response!=null)
88 | {
89 | return response.toString();
90 | }
91 | else
92 | {
93 | return "Failed to fetch response for url :"+url;
94 | }
95 | }
96 |
97 | }
98 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/utilities/PastieParseAndSearch.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.utilities;
2 |
3 | import java.io.IOException;
4 | import java.util.ArrayList;
5 |
6 | import org.kafkaparser.pojo.Data;
7 |
8 | public class PastieParseAndSearch {
9 |
10 | static
11 | {
12 | try {
13 | ConfigParams.initialzie();
14 | } catch (IOException e) {
15 | // TODO Auto-generated catch block
16 | e.printStackTrace();
17 | }
18 | }
19 |
20 | public static ArrayList fetchids(String archiveUrl,String regex) throws IOException, InterruptedException
21 | {
22 | //ConfigParams.initialzie();
23 | //System.out.println(archiveUrl);
24 | //System.out.println(HttpUtilities.sendGet(archiveUrl));
25 | ArrayList ids= Search.extractRegexMatches(HttpUtilities.sendGet(archiveUrl), regex);
26 | /**for (String each:ids)
27 | {
28 | System.out.println(each);
29 | }**/
30 | return ids;
31 | }
32 |
33 | public static void searchEachid(Data data) throws IOException
34 | {
35 | Search.find(data);
36 | /**
37 | if(termsFound.size()<=0)
38 | return false;
39 | else
40 | return true;
41 | */
42 | }
43 |
44 | public static void main (String [] args) throws InterruptedException
45 | {
46 | ArrayList ids;
47 | try {
48 | //ids = PastieParseAndSearch.fetchids("https://pastebin.com/archive", "href=\"/(\\w{8})\">");
49 | ids = PastieParseAndSearch.fetchids("https://slexy.org/recent", "View paste");
50 |
51 |
52 | ArrayList list = new ArrayList();
53 | list.add("method434");
54 | for(String id:ids)
55 | {
56 | System.out.println("Testing currently"+id);
57 | // Data temp=PastieParseAndSearch.searchEachid(new Data("https://pastebin.com/raw/{id}".replace("{id}", id),list));
58 | // if(temp!=null)
59 | // {
60 | // System.out.print("found in "+temp.getUrl());
61 | // }
62 | // else
63 | // {
64 | // System.out.println("notfound");
65 | // }
66 | }
67 | } catch (IOException e) {
68 | // TODO Auto-generated catch block
69 | e.printStackTrace();
70 | }
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/utilities/Search.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.utilities;
2 |
3 | import java.io.IOException;
4 | import java.text.DateFormat;
5 | import java.text.SimpleDateFormat;
6 | import java.util.ArrayList;
7 | import java.util.Date;
8 | import java.util.regex.Pattern;
9 | import org.kafkaparser.pojo.Data;
10 |
11 | import java.util.regex.Matcher;
12 |
13 | public class Search
14 | {
15 |
16 | private static DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
17 | private static Date date = new Date();
18 | private static DateFormat df = new SimpleDateFormat("dd/MM/yy HH:mm:ss");
19 | private static Date dateobj = new Date();
20 |
21 |
22 | public static void find(Data data) throws IOException
23 | {
24 | //System.out.println(data.getUrl());
25 | SearchThread searchThread=new SearchThread();
26 | searchThread.initialize(data);
27 | new Thread(searchThread).start();
28 | /**
29 | String response=HttpUtilities.sendGet(data.getUrl());
30 | ArrayList termsFound = new ArrayList();
31 | for(String s:data.getSearchTerms())
32 | {
33 | if(response.contains(s))
34 | {
35 | termsFound.add(s);
36 | }
37 | }
38 | if(termsFound.size()>0)
39 | {
40 | //check if multiple threads are resulting in reading same data again and again over ok
41 | System.out.println(df.format(dateobj)+"found in **************************************************"+data.getUrl()+" data found is "+termsFound.get(0));**/
42 |
43 | /**if(!DaoUtil.searchDuplicateByUrl(data.getUrl()))
44 | {
45 | EmailUtility.sendEmailUsingGmail("Later", data.getUrl(), termsFound);
46 | Result result = new Result();
47 | result.setSearchedTerms(termsFound);
48 | result.setSearchedtext("This is for future implementation");
49 | result.setUrl(data.getUrl());
50 | result.setTime(dateFormat.format(date).toString());
51 | DaoUtil.insert(result);
52 | }
53 | }**/
54 | //return termsFound;
55 | }
56 |
57 | public static ArrayList extractRegexMatches(String response,String regex)
58 | {
59 | ArrayList matches= new ArrayList();
60 | Pattern pattern = Pattern.compile(regex,Pattern.MULTILINE);
61 | Matcher matcher = pattern.matcher(response);
62 | while (matcher.find()) {
63 | matches.add(matcher.group(1));
64 | }
65 |
66 | return matches;
67 | }
68 |
69 |
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/kafka-parser/src/main/java/org/kafkaparser/utilities/TruffleHog.java:
--------------------------------------------------------------------------------
1 | package org.kafkaparser.utilities;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.IOException;
5 | import java.io.InputStreamReader;
6 | import java.util.HashSet;
7 | import java.util.Set;
8 | import java.util.concurrent.TimeUnit;
9 | import java.util.regex.Matcher;
10 | import java.util.regex.Pattern;
11 |
12 | import org.sqlite.dataaccess.entity.SearchItem;
13 | import org.sqlite.dataaccess.util.DaoUtil;
14 |
15 | public class TruffleHog implements Runnable {
16 |
17 | private static String regexForSecret = "stringsFound\": (.*)}";
18 |
19 | private String pastielink;
20 | private String searchTerm;
21 | private String profile;
22 | private String regex;
23 | private String entropy;
24 | private String botName;
25 | private String filePath;
26 |
27 | public void initilaize(String filePath, String pastielink, String searchTerm, String profile, String regex,
28 | String entropy) {
29 | this.filePath = filePath;
30 | this.pastielink = pastielink;
31 | this.searchTerm = searchTerm;
32 | this.profile = profile;
33 | this.regex = regex;
34 | this.entropy = entropy;
35 |
36 | }
37 |
38 | public Set getSecrets() throws IOException, InterruptedException {
39 | final Set secretSet = new HashSet();
40 |
41 | if (regex.equals("false")) {
42 | regex = "";
43 | } else {
44 | regex = "--regex";
45 | }
46 | String[] cmd = {
47 | // "/usr/local/bin/python2.7",
48 | // "/usr/bin/python2.7",
49 | // "/Users/n0r00ij/Downloads/truffleHog-dev/truffleHog/truffleHog/truffleHog.py",
50 | // ConfigData.pythonPath,
51 | // ConfigData.trufflehogPath,
52 | "trufflehog", regex, "--cleanup", "--entropy=" + entropy, "--json", "file://" + filePath };
53 |
54 | Process p = Runtime.getRuntime().exec(cmd);
55 | // p.waitFor();
56 | BufferedReader bri = new BufferedReader(new InputStreamReader(p.getInputStream()));
57 | BufferedReader bre = new BufferedReader(new InputStreamReader(p.getErrorStream()));
58 | String line;
59 | while ((line = bri.readLine()) != null) {
60 | // System.out.println(line);
61 | // System.out.println();
62 | secretSet.addAll(extractRegexMatches(line, regexForSecret));
63 | // System.out.println(line);
64 |
65 | }
66 | bri.close();
67 | while ((line = bre.readLine()) != null) {
68 | // System.out.println(line);
69 | secretSet.addAll(extractRegexMatches(line, regexForSecret));
70 | // System.out.println(line);
71 |
72 | }
73 | bre.close();
74 | p.waitFor(5, TimeUnit.MINUTES);
75 |
76 | p.destroyForcibly();
77 |
78 | // p.destroy();
79 |
80 | Boolean is_Valid = false;
81 | if (secretSet.size() > 0) {
82 | System.out.println("Issues have been found ************* Sending email");
83 | Set temp = new HashSet();
84 | temp.add(pastielink);
85 | EmailUtility.sendEmailUsingGmail(profile, temp, searchTerm);
86 | is_Valid = true;
87 | }
88 |
89 | /**
90 | * if(regex.toLowerCase().equals("false") &&
91 | * this.entropy.toLowerCase().equals("false")) {
92 | *
93 | * }
94 | **/
95 |
96 | if (!DaoUtil.searchDuplicateByUrl(pastielink)) {
97 | DbUtil.addNewEntry(secretSet, pastielink, profile, is_Valid);
98 |
99 | }
100 | return secretSet;
101 | }
102 |
103 | public static Set extractRegexMatches(String line, String regex) {
104 | final Set matchSet = new HashSet();
105 | Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
106 | Matcher matcher = pattern.matcher(line);
107 | while (matcher.find()) {
108 | final SearchItem searchItem = new SearchItem();
109 | searchItem.setSearchItem(matcher.group(1));
110 | matchSet.add(searchItem);
111 | }
112 | return matchSet;
113 | }
114 |
115 | @Override
116 | public void run() {
117 | // TODO Auto-generated method stub
118 | try {
119 | getSecrets();
120 | } catch (IOException | InterruptedException e) {
121 | // TODO Auto-generated catch block
122 | e.printStackTrace();
123 | }
124 |
125 | }
126 |
127 | /**
128 | * public static void main(String args[]) { try {
129 | * System.out.println(getSecrets("https://github.com/cogdog/tweets.git")); }
130 | * catch (IOException e) { // TODO Auto-generated catch block
131 | * e.printStackTrace(); } catch (InterruptedException e) { // TODO
132 | * Auto-generated catch block e.printStackTrace(); } }
133 | **/
134 |
135 | }
136 |
--------------------------------------------------------------------------------
/kafka-parser/src/test/java/org/kafka/parser/AppTest.java:
--------------------------------------------------------------------------------
1 | package org.kafka.parser;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 |
5 | com.rts.scrap
6 | rts
7 | 1.0-SNAPSHOT
8 | pom
9 |
10 |
11 | sqlite-dataaccess
12 | kafka-parser
13 | rts-base
14 | rts-impl
15 | scraptool
16 |
17 |
18 |
19 |
20 |
21 | junit
22 | junit
23 | 4.12
24 | test
25 |
26 |
27 |
28 |
29 |
30 | 1.8
31 | 1.8
32 | UTF-8
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/rts-base/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/rts-base/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/rts-base/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | rts-base
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/rts-base/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/test/java=UTF-8
4 | encoding/=UTF-8
5 |
--------------------------------------------------------------------------------
/rts-base/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/rts-base/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.rts.scrap
7 | rts
8 | 1.0-SNAPSHOT
9 |
10 | com.rts.scrap
11 | rts-base
12 | 1.0-SNAPSHOT
13 | rts-base
14 | http://maven.apache.org
15 |
16 | UTF-8
17 |
18 |
19 |
20 | junit
21 | junit
22 | 3.8.1
23 | test
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/rts-base/src/main/java/org/rts/base/Scrapper.java:
--------------------------------------------------------------------------------
1 | package org.rts.base;
2 |
3 | import java.util.Properties;
4 |
5 | /**
6 | Scrapper is a entity which is to scrap a given pastie,twitter,github etc..
7 | All scrappers have to implement this interface. it is mandatory to extend for best practices and to allow loading of classes dynamically.
8 | If some one needs to extend this framework then it can be done easily by implementing this interface and Scrapperprofile interface.
9 | **/
10 | public interface Scrapper extends Runnable {
11 |
12 | //This function is used to initialize the scrapper
13 |
14 | public void initScrapper(Properties prop);
15 |
16 |
17 | //This function does the atual work working with kafka
18 | public void kickOffActualWork();
19 |
20 | //This function is used to stop scrapper from running
21 | public boolean stopScrapper();
22 |
23 | //This function is used to stop scrapper from running
24 | public boolean restart();
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/rts-base/src/main/java/org/rts/base/ScrapperImpl.java:
--------------------------------------------------------------------------------
1 | package org.rts.base;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.io.File;
5 | import java.io.IOException;
6 | import java.nio.file.Files;
7 | import java.util.HashMap;
8 | import java.util.Map;
9 | import java.util.Properties;
10 |
11 | import org.rts.base.profileregistry.ScrapperProfileRegistry;
12 | import org.rts.base.utilities.PropertyUtilities;;
13 |
14 | public class ScrapperImpl {
15 |
16 | public boolean initlialized;
17 | private Map scrapperMap = new HashMap();
18 | private static ScrapperImpl instance;
19 | private Properties properties = new Properties();
20 |
21 | public Map getScrapperMap()
22 | {
23 | return scrapperMap;
24 | }
25 |
26 | public static synchronized ScrapperImpl getInstance() {
27 | if (instance == null) {
28 | instance = new ScrapperImpl();
29 | }
30 | return instance;
31 | }
32 |
33 | public synchronized void initialize (File configDirectory) throws IOException
34 | {
35 | Properties prop = new Properties();
36 | prop.setProperty("configDirectory", configDirectory.getAbsolutePath());
37 | prop.load(new ByteArrayInputStream(Files.readAllBytes(new File(configDirectory, "scanner-configuration.properties").toPath())));
38 | initialize(prop);
39 |
40 | }
41 |
42 | public synchronized void initialize (Properties prop)throws IOException
43 | {
44 | properties = new Properties();
45 | properties.putAll(prop);
46 | Map scannerPropertiesMap = PropertyUtilities.propertiesGroupByFirstDot(PropertyUtilities.filterAndShiftByFirstDot(properties, "scrapper"));
47 | try{
48 | for(Map.Entry entry:scannerPropertiesMap.entrySet())
49 | {
50 | String profile = entry.getValue().getProperty("profile");
51 | scrapperMap.put(profile, ScrapperProfileRegistry.newScrapProfile(profile, entry.getValue()));
52 | }
53 | }
54 |
55 | finally
56 | {
57 | // This is yet to be implemented and i will decide on this later man
58 | }
59 | initlialized=true;
60 |
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/rts-base/src/main/java/org/rts/base/ScrapperProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.base;
2 |
3 | import java.util.Properties;
4 |
5 | /**
6 | * @author rudrapna
7 | *ScrapperProfile interface is implemented by profilers of Scrappers whicg returns new instance of a Scrapper. All Scrappers are loaded dynamically
8 | *using class loader.
9 | */
10 |
11 | public interface ScrapperProfile {
12 |
13 | // This function is used to get the name of
14 | public String getName();
15 |
16 | Scrapper newInstance(Properties properties);
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/rts-base/src/main/java/org/rts/base/exceptions/ScrapperNotvalidException.java:
--------------------------------------------------------------------------------
1 | package org.rts.base.exceptions;
2 |
3 | public class ScrapperNotvalidException extends RuntimeException{
4 | public ScrapperNotvalidException()
5 | {
6 |
7 | }
8 |
9 | }
10 |
--------------------------------------------------------------------------------
/rts-base/src/main/java/org/rts/base/profileregistry/ScrapperProfileRegistry.java:
--------------------------------------------------------------------------------
1 | package org.rts.base.profileregistry;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 | import java.util.Properties;
6 | import java.util.ServiceLoader;
7 |
8 | import org.rts.base.Scrapper;
9 | import org.rts.base.ScrapperProfile;
10 |
11 | public class ScrapperProfileRegistry {
12 |
13 | static Map scrapperProfileMap = new HashMap();
14 |
15 | static {
16 | //This is a service loader used to load all scanning profiless
17 | ServiceLoader loadSP = ServiceLoader.load(ScrapperProfile.class, ScrapperProfile.class.getClassLoader());
18 | for (ScrapperProfile scrapperProfile : loadSP) {
19 | scrapperProfileMap.put(scrapperProfile.getName(), scrapperProfile);
20 | }
21 | }
22 |
23 | public static Scrapper newScrapProfile(String profile,Properties properties)
24 | {
25 | ScrapperProfile sp=scrapperProfileMap.get(profile);
26 | if (sp==null)
27 | {
28 | throw new RuntimeException("ScannerProfile "+profile+" is not defined");
29 | }
30 | return sp.newInstance(properties);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/rts-base/src/main/java/org/rts/base/utilities/PropertyUtilities.java:
--------------------------------------------------------------------------------
1 | package org.rts.base.utilities;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 | import java.util.Properties;
6 | import java.util.regex.Pattern;
7 |
8 |
9 | public class PropertyUtilities {
10 | private static final Pattern DOT_REGEXP = Pattern.compile("\\.");
11 |
12 | public static Properties filterAndShiftByFirstDot(Properties p, String prefix) {
13 | Properties result = new Properties();
14 | for (String key : p.stringPropertyNames()) {
15 | String[] split = splitByFirstDot(key);
16 | String kfirst=split[0];
17 | if (prefix.equals(kfirst)) {
18 | String knew = split.length > 1 ? split[1] : "";
19 | result.setProperty(knew, p.getProperty(key));
20 | }
21 | }
22 | return result;
23 | }
24 |
25 | public static Map propertiesGroupByFirstDot(Properties p) {
26 | HashMap result = new HashMap();
27 | for (String key : p.stringPropertyNames()) {
28 | String[] split = splitByFirstDot(key);
29 | String kfirst=split[0];
30 | String knew=split.length>1 ? split[1] : "";
31 | Properties vp;
32 | if (result.containsKey(kfirst)) {
33 | vp = result.get(kfirst);
34 | } else {
35 | vp = new Properties();
36 | result.put(kfirst, vp);
37 | }
38 | vp.setProperty(knew, p.getProperty(key));
39 | }
40 | return result;
41 | }
42 |
43 | public static String[] splitByFirstDot(String s) {
44 | return DOT_REGEXP.split(s, 2);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/rts-base/src/test/java/org/rts/base/AppTest.java:
--------------------------------------------------------------------------------
1 | package org.rts.base;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/rts-impl/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/rts-impl/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/rts-impl/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | rts-impl
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/rts-impl/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/main/resources=UTF-8
4 | encoding//src/test/java=UTF-8
5 | encoding/=UTF-8
6 |
--------------------------------------------------------------------------------
/rts-impl/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/rts-impl/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.rts.scrap
7 | rts
8 | 1.0-SNAPSHOT
9 |
10 | com.rts.scrap
11 | rts-impl
12 | 1.0-SNAPSHOT
13 | rts-impl
14 |
15 |
16 | http://maven.apache.org
17 |
18 | UTF-8
19 |
20 |
21 |
22 |
23 | junit
24 | junit
25 | 4.9
26 | test
27 |
28 |
29 |
30 | log4j
31 | log4j
32 | 1.2.17
33 |
34 |
35 | commons-cli
36 | commons-cli
37 | 1.4
38 |
39 |
40 | com.github.jreddit
41 | jreddit
42 | 1.0.3
43 |
44 |
45 | com.googlecode.json-simple
46 | json-simple
47 | 1.1
48 |
49 |
50 | com.rts.scrap
51 | kafka-parser
52 | 1.0-SNAPSHOT
53 |
54 |
55 |
56 |
57 | com.rts.scrap
58 | rts-base
59 | 1.0-SNAPSHOT
60 |
61 |
62 | org.twitter4j
63 | twitter4j-stream
64 | 4.0.6
65 |
66 |
67 |
68 |
69 |
70 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/impl/GithubImpl.java:
--------------------------------------------------------------------------------
1 | package org.rts.impl;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Arrays;
5 | import java.util.HashSet;
6 | import java.util.Properties;
7 | import java.util.Set;
8 |
9 | import org.apache.log4j.Logger;
10 | import org.kafkaparser.utilities.DbUtil;
11 | import org.kafkaparser.utilities.EmailUtility;
12 | import org.rts.base.Scrapper;
13 | import org.rts.utilities.JsonParserForGithub;
14 | import org.rts.utilities.TruffleHog;
15 | import org.sqlite.dataaccess.entity.SearchItem;
16 | import org.sqlite.dataaccess.util.DaoUtil;
17 |
18 | public class GithubImpl implements Scrapper {
19 |
20 | private String baseurl;
21 | private String access_token;
22 | private String timetoSleep;
23 | private ArrayList searchTerms=new ArrayList();
24 | final static Logger logger = Logger.getLogger(GithubImpl.class);
25 | private String profile ="";
26 | private String trufflehogregex="false";
27 | private String trufflehogentropy="false";
28 |
29 | public void run() {
30 | // TODO Auto-generated method stub
31 | kickOffActualWork();
32 |
33 | }
34 |
35 | public void initScrapper(Properties prop)
36 | {
37 | // TODO Auto-generated method stub
38 | this.baseurl=prop.getProperty("baseurl");
39 | this.access_token=prop.getProperty("access_token");
40 | this.timetoSleep=prop.getProperty("timetosleep");
41 | this.searchTerms=new ArrayList(Arrays.asList(prop.getProperty("searchterms").split("\\s*,\\s*")));
42 | this.profile= prop.getProperty("profile");
43 | this.trufflehogregex=prop.getProperty("trufflehogregex").toLowerCase();
44 | this.trufflehogentropy=prop.getProperty("trufflehogentropy").toLowerCase();
45 | }
46 |
47 | public void kickOffActualWork() {
48 | System.out.println("Kicked off github");
49 |
50 |
51 | while(true)
52 | {
53 | try {
54 | for (String searchTerm : searchTerms)
55 | {
56 | Set alertSet=JsonParserForGithub.githubUrlFetcher(baseurl.replace("{searchTerm}", searchTerm.replace(" ", "%20"))+"&access_token="+access_token);
57 | System.out.println("Got url" + alertSet);
58 | ArrayList threads= new ArrayList<>();
59 | if(trufflehogregex.equals("true") || trufflehogentropy.equals("true"))
60 | {
61 | for(String url:alertSet)
62 | {
63 |
64 | {
65 |
66 | if(!DaoUtil.searchDuplicateByUrl(url))
67 | {
68 | System.out.println("Analyzing url************" + url);
69 | TruffleHog truffleHogThread = new TruffleHog();
70 | truffleHogThread.initilaize(url, searchTerm,profile,trufflehogregex,trufflehogentropy);
71 | Thread t = new Thread(truffleHogThread);
72 | threads.add(t);
73 | t.start();
74 | //while(t.isAlive());//This is a bad idea waiting for every thread man
75 | }
76 | }
77 |
78 | }
79 | }
80 |
81 | else if(alertSet.size()>0)
82 | {
83 | Set filteredalertSet = new HashSet();
84 | for(String url:alertSet)
85 | {
86 | if(!DaoUtil.searchDuplicateByUrl(url))
87 | {
88 | //System.out.println("Comparing url" + url);
89 | filteredalertSet.add(url);
90 | }
91 | }
92 | EmailUtility.sendEmailUsingGmail(profile, filteredalertSet, searchTerm);
93 | for(String url:filteredalertSet)
94 | {
95 | if(!DaoUtil.searchDuplicateByUrl(url))
96 | {
97 | final Set temp=new HashSet();
98 | final SearchItem searchItem = new SearchItem();
99 | searchItem.setSearchItem(searchTerm);
100 | temp.add(searchItem);
101 | DbUtil.addNewEntry(temp, url,profile);
102 |
103 | }
104 | }
105 | }
106 |
107 |
108 |
109 |
110 | /**if(filteredalertSet.size()>0)
111 | {
112 | //System.out.println("Reuqired terms have been found");
113 | EmailUtility.sendEmailUsingGmail(profile, filteredalertSet, searchterm);
114 | for(String url:filteredalertSet)
115 | {
116 | if(!DaoUtil.searchDuplicateByUrl(url))
117 | {
118 | ArrayList temp=new ArrayList();
119 | temp.add(searchterm);
120 | DbUtil.addNewEntry(temp, url,profile);
121 |
122 | }
123 | }
124 | }**/
125 | }
126 | Thread.sleep(Integer.parseInt(timetoSleep));
127 | } catch (Exception e) {
128 | logger.error("kickOffActualWork function in GithubImpl class has thrown exception",e);
129 | }
130 |
131 | System.gc();
132 |
133 | }
134 |
135 | }
136 |
137 | public boolean stopScrapper() {
138 | // TODO Auto-generated method stub
139 | //enable multiple threads to speed up the process man
140 | return false;
141 | }
142 |
143 | public boolean restart() {
144 | // TODO Auto-generated method stub
145 | return false;
146 | }
147 |
148 | public static void main(String args[])
149 | {
150 | Thread t=new Thread(new GithubImpl());
151 | t.run();
152 | }
153 |
154 | }
155 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/impl/PastieImpl.java:
--------------------------------------------------------------------------------
1 | package org.rts.impl;
2 |
3 | import java.io.IOException;
4 | import java.util.ArrayList;
5 | import java.util.Arrays;
6 | import java.util.HashSet;
7 | import java.util.Iterator;
8 | import java.util.Properties;
9 | import java.util.Queue;
10 | import java.util.Set;
11 | import org.apache.log4j.Logger;
12 | import com.google.common.collect.EvictingQueue;
13 | import org.kafkaparser.base.Producer;
14 | import org.rts.base.Scrapper;
15 | import org.kafkaparser.utilities.ConfigData;
16 | import org.kafkaparser.utilities.PastieParseAndSearch;
17 | import org.kafkaparser.pojo.Data;
18 |
19 | public class PastieImpl implements Scrapper{
20 |
21 | private String homeurl;
22 | private String regex;
23 | private String downloadurl;
24 | private String timetoSleep;
25 | public Queue evictingQueue= EvictingQueue.create(600);
26 | public Set previousSet = new HashSet();
27 | public Set presentSet = new HashSet();
28 | public Set diffSet = new HashSet();
29 | private ArrayList searchTerms=new ArrayList();
30 | final static Logger logger = Logger.getLogger(PastieImpl.class);
31 | private String profile="";
32 | private String trufflehogregex="false";
33 | private String trufflehogentropy="false";
34 |
35 | public void initScrapper(Properties prop) {
36 | // TODO Auto-generated method stub
37 | //write a common function to check values of all the variables that it is populated in the config proeprties file man
38 | //maintain previous test ones and compare with present ones before making any requests again man
39 | this.regex=prop.getProperty("regex");
40 | this.downloadurl=prop.getProperty("downloadurl");
41 | this.homeurl=prop.getProperty("homeurl");
42 | this.timetoSleep=prop.getProperty("timetosleep");
43 | this.searchTerms=new ArrayList(Arrays.asList(prop.getProperty("searchterms").split("\\s*,\\s*")));
44 | this.profile=prop.getProperty("profile");
45 | this.trufflehogregex=prop.getProperty("trufflehogregex").toLowerCase();
46 | this.trufflehogentropy=prop.getProperty("trufflehogentropy").toLowerCase();
47 | }
48 |
49 | public void run() {
50 | System.out.println("Kicked off "+profile);
51 | try {
52 | while (true)
53 | {
54 | kickOffActualWork();
55 | Thread.sleep(Integer.parseInt(timetoSleep));
56 | System.gc();
57 | }
58 | } catch (InterruptedException e) {
59 | // TODO Auto-generated catch block
60 | logger.error("Pastie Impl run has failed:",e);
61 | }
62 | }
63 |
64 | public void kickOffActualWork() {
65 | // TODO Auto-generated method stub
66 |
67 | ArrayList ids = null;
68 | try {
69 | ids = PastieParseAndSearch.fetchids(homeurl,regex);
70 | presentSet = new HashSet(ids);
71 | //diffSet=Difference.getDiff(previousSet, presentSet);
72 | //previousSet = presentSet;
73 | Iterator iter = presentSet.iterator();
74 | while (iter.hasNext())
75 | { String id=(String)iter.next();
76 | if(!evictingQueue.contains(id))
77 | {
78 | Producer.send(new Data(downloadurl.replace("{id}", id),searchTerms,profile,trufflehogregex,trufflehogentropy), ConfigData.topicName);
79 | }
80 |
81 | evictingQueue.add(id);
82 | }
83 | }
84 | catch (IOException e) {
85 | // TODO Auto-generated catch block
86 | logger.error("Pastie Impl kickoff has failed:",e);
87 | } catch (InterruptedException e) {
88 | // TODO Auto-generated catch block
89 | e.printStackTrace();
90 | }
91 | }
92 |
93 | public boolean stopScrapper() {
94 | // TODO Auto-generated method stub
95 | return false;
96 | }
97 |
98 | public boolean restart() {
99 | // TODO Auto-generated method stub
100 | return false;
101 | }
102 |
103 | }
104 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/impl/RedditImpl.java:
--------------------------------------------------------------------------------
1 | package org.rts.impl;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Arrays;
5 | import java.util.HashSet;
6 | import java.util.Iterator;
7 | import java.util.Properties;
8 | import java.util.Set;
9 | import org.rts.base.Scrapper;
10 | import org.kafkaparser.utilities.DbUtil;
11 | import org.kafkaparser.utilities.EmailUtility;
12 | import org.rts.utilities.JsonParserForReddit;
13 | import org.rts.utilities.TruffleHog;
14 | import org.sqlite.dataaccess.entity.SearchItem;
15 | import org.sqlite.dataaccess.util.DaoUtil;
16 |
17 | public class RedditImpl implements Scrapper {
18 |
19 | // implement array of searchterms needed here man
20 | private String baseurl;
21 | private String timetoSleep;
22 | private ArrayList searchTerms = new ArrayList();
23 | private String profile = "";
24 | private String trufflehogregex = "false";
25 | private String trufflehogentropy = "false";
26 |
27 | public void run() {
28 | // TODO Auto-generated method stub
29 | kickOffActualWork();
30 |
31 | }
32 |
33 | public void initScrapper(Properties prop) {
34 | this.baseurl = prop.getProperty("baseurl");
35 | this.timetoSleep = prop.getProperty("timetosleep");
36 | this.searchTerms = new ArrayList(Arrays.asList(prop.getProperty("searchterms").split("\\s*,\\s*")));
37 | this.profile = prop.getProperty("profile");
38 | this.trufflehogregex = prop.getProperty("trufflehogregex").toLowerCase();
39 | this.trufflehogentropy = prop.getProperty("trufflehogentropy").toLowerCase();
40 |
41 | }
42 |
43 | public void kickOffActualWork() {
44 | while (true) {
45 | try {
46 | for (String searchTerm : searchTerms) {
47 | Set alertSet = JsonParserForReddit
48 | .redditUrlFetcher(baseurl.replace("{searchterm}", searchTerm.replace(" ", "%20")));
49 | ArrayList threads = new ArrayList<>();
50 | if (trufflehogregex.equals("true") || trufflehogentropy.equals("true")) {
51 | for (String url : alertSet) {
52 |
53 | {
54 |
55 | if (!DaoUtil.searchDuplicateByUrl(url)) {
56 | System.out.println("Analyzing url************" + url);
57 | TruffleHog truffleHogThread = new TruffleHog();
58 | truffleHogThread.initilaize(url, searchTerm, profile, trufflehogregex,
59 | trufflehogentropy);
60 | Thread t = new Thread(truffleHogThread);
61 | threads.add(t);
62 | t.start();
63 | }
64 | }
65 |
66 | }
67 | }
68 |
69 | else if (alertSet.size() > 0) {
70 | Set filteredalertSet = new HashSet();
71 | for (String url : alertSet) {
72 | if (!DaoUtil.searchDuplicateByUrl(url)) {
73 | // System.out.println("Comparing url" + url);
74 | filteredalertSet.add(url);
75 | }
76 | }
77 | EmailUtility.sendEmailUsingGmail(profile, filteredalertSet, searchTerm);
78 | for (String url : filteredalertSet) {
79 | if (!DaoUtil.searchDuplicateByUrl(url)) {
80 | final Set temp = new HashSet();
81 | final SearchItem searchItem = new SearchItem();
82 | searchItem.setSearchItem(searchTerm);
83 | temp.add(searchItem);
84 | DbUtil.addNewEntry(temp, url, profile);
85 |
86 | }
87 | }
88 | }
89 |
90 | }
91 | Thread.sleep(Integer.parseInt(timetoSleep));
92 | } catch (Exception e) {
93 | // TODO Auto-generated catch block
94 | e.printStackTrace();
95 | }
96 | System.gc();
97 | }
98 |
99 | }
100 |
101 | public boolean stopScrapper() {
102 | // TODO Auto-generated method stub
103 | return false;
104 | }
105 |
106 | public boolean restart() {
107 | // TODO Auto-generated method stub
108 | return false;
109 | }
110 |
111 | }
112 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/CodepadProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.PastieImpl;
8 |
9 | public class CodepadProfile implements ScrapperProfile{
10 |
11 | public String getName() {
12 | // TODO Auto-generated method stub
13 | return "Codepad";
14 | }
15 |
16 | public Scrapper newInstance(Properties properties) {
17 | // TODO Auto-generated method stub
18 | PastieImpl pastie=new PastieImpl();
19 | pastie.initScrapper(properties);
20 | return pastie;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/Dumpz.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.PastieImpl;
8 |
9 | public class Dumpz implements ScrapperProfile{
10 |
11 | public String getName() {
12 | // TODO Auto-generated method stub
13 | return "Dumpzorg";
14 | }
15 |
16 | public Scrapper newInstance(Properties properties) {
17 | // TODO Auto-generated method stub
18 | PastieImpl pastie=new PastieImpl();
19 | pastie.initScrapper(properties);
20 | return pastie;
21 | }
22 |
23 | }
24 |
25 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/GistGithubProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 | import org.rts.base.Scrapper;
5 | import org.rts.base.ScrapperProfile;
6 | import org.rts.impl.PastieImpl;
7 |
8 | public class GistGithubProfile implements ScrapperProfile{
9 |
10 | public String getName() {
11 | // TODO Auto-generated method stub
12 | return "GistGithub";
13 | }
14 |
15 | public Scrapper newInstance(Properties properties) {
16 | // TODO Auto-generated method stub
17 | PastieImpl pastie=new PastieImpl();
18 | pastie.initScrapper(properties);
19 | return pastie;
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/GithubProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 | import org.rts.base.Scrapper;
5 | import org.rts.base.ScrapperProfile;
6 | import org.rts.impl.GithubImpl;
7 |
8 | public class GithubProfile implements ScrapperProfile{
9 |
10 | public String getName() {
11 | // TODO Auto-generated method stub
12 | return "Github";
13 | }
14 |
15 | public Scrapper newInstance(Properties properties) {
16 | // TODO Auto-generated method stub
17 |
18 | GithubImpl gitHub=new GithubImpl();
19 | gitHub.initScrapper(properties);
20 | return gitHub;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/IdeonecomProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 | import org.rts.base.Scrapper;
5 | import org.rts.base.ScrapperProfile;
6 | import org.rts.impl.PastieImpl;
7 |
8 | public class IdeonecomProfile implements ScrapperProfile {
9 |
10 | public String getName() {
11 | // TODO Auto-generated method stub
12 | return "Ideonecom";
13 | }
14 |
15 | public Scrapper newInstance(Properties properties) {
16 | // TODO Auto-generated method stub
17 | PastieImpl pastie=new PastieImpl();
18 | pastie.initScrapper(properties);
19 | return pastie;
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/KpastenetProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 | import org.rts.base.Scrapper;
5 | import org.rts.base.ScrapperProfile;
6 | import org.rts.impl.PastieImpl;
7 |
8 | public class KpastenetProfile implements ScrapperProfile{
9 |
10 | public String getName() {
11 | // TODO Auto-generated method stub
12 | return "Kpastenet";
13 | }
14 |
15 | public Scrapper newInstance(Properties properties) {
16 | // TODO Auto-generated method stub
17 | PastieImpl pastie=new PastieImpl();
18 | pastie.initScrapper(properties);
19 | return pastie;
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/Lpaste.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.PastieImpl;
8 |
9 | public class Lpaste implements ScrapperProfile{
10 |
11 | public String getName() {
12 | // TODO Auto-generated method stub
13 | return "Lpaste";
14 | }
15 |
16 | public Scrapper newInstance(Properties properties) {
17 | // TODO Auto-generated method stub
18 | PastieImpl pastie=new PastieImpl();
19 | pastie.initScrapper(properties);
20 | return pastie;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/PastebincaProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 | import org.rts.base.Scrapper;
5 | import org.rts.base.ScrapperProfile;
6 | import org.rts.impl.PastieImpl;
7 |
8 | public class PastebincaProfile implements ScrapperProfile{
9 | public String getName() {
10 | // TODO Auto-generated method stub
11 | //check with time if pastebin.ca comess up with different results browsers and mine are not matching.. need to identify what is that extra parameter needed man
12 | return "Pastebinca";
13 | }
14 |
15 | public Scrapper newInstance(Properties properties) {
16 | // TODO Auto-generated method stub
17 | PastieImpl pastie=new PastieImpl();
18 | pastie.initScrapper(properties);
19 | return pastie;
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/PastebinfrProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 | import org.rts.base.Scrapper;
5 | import org.rts.base.ScrapperProfile;
6 | import org.rts.impl.PastieImpl;
7 |
8 | public class PastebinfrProfile implements ScrapperProfile{
9 |
10 | public String getName() {
11 | // TODO Auto-generated method stub
12 | return "Pastebinfr";
13 | }
14 |
15 | public Scrapper newInstance(Properties properties) {
16 | // TODO Auto-generated method stub
17 | PastieImpl pastie=new PastieImpl();
18 | pastie.initScrapper(properties);
19 | return pastie;
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/PastebinruProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 | import org.rts.base.Scrapper;
5 | import org.rts.base.ScrapperProfile;
6 | import org.rts.impl.PastieImpl;
7 |
8 | public class PastebinruProfile implements ScrapperProfile {
9 |
10 | public String getName() {
11 | // TODO Auto-generated method stub
12 | return "Pastebinru";
13 | }
14 |
15 | public Scrapper newInstance(Properties properties) {
16 | // TODO Auto-generated method stub
17 | PastieImpl pastie=new PastieImpl();
18 | pastie.initScrapper(properties);
19 | return pastie;
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/Pasteorgru.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.PastieImpl;
8 |
9 | public class Pasteorgru implements ScrapperProfile {
10 |
11 | public String getName() {
12 | // TODO Auto-generated method stub
13 | return "Pasteorgru";
14 | }
15 |
16 | public Scrapper newInstance(Properties properties) {
17 | // TODO Auto-generated method stub
18 | PastieImpl pastie=new PastieImpl();
19 | pastie.initScrapper(properties);
20 | return pastie;
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/PastieProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.PastieImpl;
8 |
9 | public class PastieProfile implements ScrapperProfile{
10 |
11 | public String getName() {
12 | // TODO Auto-generated method stub
13 | return "Pastie";
14 | }
15 |
16 | public Scrapper newInstance(Properties properties) {
17 | // TODO Auto-generated method stub
18 | PastieImpl pastie=new PastieImpl();
19 | pastie.initScrapper(properties);
20 | return pastie;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/RedditProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.RedditImpl;
8 |
9 | public class RedditProfile implements ScrapperProfile {
10 |
11 | public String getName() {
12 | // TODO Auto-generated method stub
13 | return "Reddit";
14 | }
15 |
16 | public Scrapper newInstance(Properties properties) {
17 | // TODO Auto-generated method stub
18 | RedditImpl reddit=new RedditImpl();
19 | reddit.initScrapper(properties);
20 | return reddit;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/SlexyOrgProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.PastieImpl;
8 |
9 | public class SlexyOrgProfile implements ScrapperProfile{
10 |
11 |
12 | public String getName() {
13 | // TODO Auto-generated method stub
14 | return "Slexyorg";
15 | }
16 |
17 | public Scrapper newInstance(Properties properties) {
18 | // TODO Auto-generated method stub
19 | PastieImpl pastie=new PastieImpl();
20 | pastie.initScrapper(properties);
21 | return pastie;
22 | }
23 |
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/Snipplr.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.PastieImpl;
8 |
9 | public class Snipplr implements ScrapperProfile{
10 |
11 | public String getName() {
12 | // TODO Auto-generated method stub
13 | return "Snipplr";
14 | }
15 |
16 | public Scrapper newInstance(Properties properties) {
17 | // TODO Auto-generated method stub
18 | PastieImpl pastie=new PastieImpl();
19 | pastie.initScrapper(properties);
20 | return pastie;
21 | }
22 |
23 | }
24 |
25 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/rtsprofile/TwitterProfile.java:
--------------------------------------------------------------------------------
1 | package org.rts.rtsprofile;
2 |
3 | import java.util.Properties;
4 |
5 | import org.rts.base.Scrapper;
6 | import org.rts.base.ScrapperProfile;
7 | import org.rts.impl.TwitterImpl;
8 |
9 | public class TwitterProfile implements ScrapperProfile{
10 |
11 | public String getName() {
12 | // TODO Auto-generated method stub
13 | return "Twitter";
14 | }
15 |
16 | public Scrapper newInstance(Properties properties) {
17 | // TODO Auto-generated method stub
18 | TwitterImpl twitter=new TwitterImpl();
19 | twitter.initScrapper(properties);
20 | return twitter;
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/utilities/Difference.java:
--------------------------------------------------------------------------------
1 | package org.rts.utilities;
2 |
3 | import java.util.Set;
4 |
5 | public class Difference
6 | {
7 |
8 | public static Set getDiff(Set previousSet,Set presentSet)
9 | {
10 | presentSet.removeAll(previousSet);
11 | return presentSet;
12 | }
13 |
14 | public static void main(String args [])
15 | {
16 | // Set test1 = new HashSet();
17 | // test1.add("test1");
18 | // test1.add("test2");
19 | // test1.add("test3");
20 | // test1.add("test5");
21 | // test1.add("test6");
22 | //
23 | // Set test2 = new HashSet();
24 | // test2.add("test1");
25 | // test2.add("test2");
26 | // test2.add("test3");
27 | // test2.add("test4");
28 | // test2.add("test5");
29 | //
30 | // Iterator i = getDiff(test1,test2).iterator();
31 | // while (i.hasNext())
32 | // {
33 | // String name = (String) i.next();
34 | // System.out.println(name);
35 | // }
36 |
37 | }
38 |
39 |
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/utilities/JsonParserForGithub.java:
--------------------------------------------------------------------------------
1 |
2 | package org.rts.utilities;
3 |
4 | import java.util.HashSet;
5 | import java.util.Iterator;
6 | import java.util.Queue;
7 | import java.util.Set;
8 | import org.json.simple.JSONArray;
9 | import org.json.simple.JSONObject;
10 | import org.json.simple.parser.JSONParser;
11 | import org.json.simple.parser.ParseException;
12 |
13 | import com.google.common.collect.EvictingQueue;
14 | import org.kafkaparser.utilities.HttpUtilities;
15 |
16 | public class JsonParserForGithub {
17 |
18 | public static JSONParser parser = new JSONParser();
19 | public static Set previousSet = new HashSet();
20 | public static Set presentSet;
21 | public static Set diffSet = new HashSet();
22 | public static Queue evictingQueue= EvictingQueue.create(100);
23 |
24 | public static Set githubUrlFetcher(String url) throws ParseException, InterruptedException
25 | {
26 | Object obj = parser.parse(HttpUtilities.sendGet(url));
27 | JSONObject jsonObject = (JSONObject) obj;
28 | JSONArray array = (JSONArray)jsonObject.get("items");
29 | Iterator> iterator = array.iterator();
30 | presentSet = new HashSet();
31 | while(iterator.hasNext())
32 | {
33 | JSONObject eachJsonObject = (JSONObject) iterator.next();
34 | JSONObject eachrepositoryObject = (JSONObject) eachJsonObject.get("repository");
35 | String html_url=(String)eachrepositoryObject.get("html_url");
36 | html_url=html_url+".git";
37 | if(!evictingQueue.contains(html_url))
38 | {
39 | //System.out.println("Github "+html_url);
40 | presentSet.add(html_url);
41 | }
42 | evictingQueue.add(html_url);
43 | //System.out.println(eachJsonObject.get("html_url"));
44 | }
45 | //diffSet=Difference.getDiff(previousSet, presentSet);
46 | //previousSet=presentSet;
47 | return presentSet;
48 | }
49 |
50 |
51 |
52 |
53 |
54 | public static void main(String args []) throws InterruptedException
55 | {
56 | try {
57 | JsonParserForGithub.githubUrlFetcher("https://api.github.com/search/code?q=olacabs&sort=indexed&order=asc&access_token=ac2536a0beb59624d879e10084fe2d04404451bf&");
58 | //JsonParser.githubUrlFetcher("https://pastebin.com/archive");
59 | } catch (ParseException e) {
60 | // TODO Auto-generated catch block
61 | e.printStackTrace();
62 | }
63 | }
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/utilities/JsonParserForReddit.java:
--------------------------------------------------------------------------------
1 |
2 | package org.rts.utilities;
3 |
4 | import java.util.HashSet;
5 | import java.util.Iterator;
6 | import java.util.Queue;
7 | import java.util.Set;
8 |
9 | import org.json.simple.JSONArray;
10 | import org.json.simple.JSONObject;
11 | import org.json.simple.parser.JSONParser;
12 | import org.json.simple.parser.ParseException;
13 |
14 | import com.google.common.collect.EvictingQueue;
15 | import org.kafkaparser.utilities.HttpUtilities;
16 |
17 | public class JsonParserForReddit {
18 |
19 | public static JSONParser parser = new JSONParser();
20 | public static Set previousSet = new HashSet();
21 | public static Set presentSet;
22 | public static Set diffSet = new HashSet();
23 | public static Queue evictingQueue= EvictingQueue.create(100);
24 |
25 | public static Set redditUrlFetcher(String url) throws ParseException, InterruptedException
26 | {
27 | Object obj = parser.parse(HttpUtilities.sendGet(url));
28 | JSONArray array = (JSONArray)((JSONObject) ((JSONObject) obj).get("data")).get("children");
29 | @SuppressWarnings("unchecked")
30 | Iterator iterator = array.iterator();
31 | presentSet = new HashSet();
32 | while(iterator.hasNext())
33 | {
34 | //Danger new fix is going on man
35 | JSONObject eachJsonObject = (JSONObject) iterator.next();
36 | String redditUrl="https://www.reddit.com/"+(String)(((JSONObject)eachJsonObject.get("data")).get("permalink"));
37 | if(!evictingQueue.contains(redditUrl))
38 | {
39 | presentSet.add(redditUrl);
40 | }
41 | evictingQueue.add(redditUrl);
42 | }
43 | return presentSet;
44 | }
45 |
46 |
47 |
48 |
49 |
50 | public static void main(String args []) throws InterruptedException
51 | {
52 | try {
53 | JsonParserForReddit.redditUrlFetcher("https://www.reddit.com/search.json?q=olacabs%20hacked");
54 | //JsonParser.githubUrlFetcher("https://pastebin.com/archive");
55 | } catch (ParseException e) {
56 | // TODO Auto-generated catch block
57 | e.printStackTrace();
58 | }
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/rts-impl/src/main/java/org/rts/utilities/TruffleHog.java:
--------------------------------------------------------------------------------
1 | package org.rts.utilities;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.IOException;
5 | import java.io.InputStreamReader;
6 | import java.lang.reflect.Field;
7 | import java.util.ArrayList;
8 | import java.util.HashSet;
9 | import java.util.Set;
10 | import java.util.concurrent.TimeUnit;
11 | import java.util.regex.Matcher;
12 | import java.util.regex.Pattern;
13 |
14 | import org.kafkaparser.utilities.ConfigData;
15 | import org.kafkaparser.utilities.DbUtil;
16 | import org.kafkaparser.utilities.EmailUtility;
17 | import org.sqlite.dataaccess.entity.SearchItem;
18 | import org.sqlite.dataaccess.util.DaoUtil;
19 |
20 | public class TruffleHog implements Runnable {
21 |
22 | private static String regexForSecret = "stringsFound\": (.*)}";
23 |
24 | private String link;
25 | private String searchTerm;
26 | private String profile;
27 | private String regex;
28 | private String entropy;
29 | private String pythonPath;
30 | private String trufflehogPath;
31 |
32 | public void initilaize(String pastielink, String searchTerm, String profile, String regex, String entropy) {
33 | this.link = pastielink;
34 | this.searchTerm = searchTerm;
35 | this.profile = profile;
36 | this.regex = regex;
37 | this.entropy = entropy;
38 |
39 | }
40 |
41 | public Set getSecrets() throws IOException, InterruptedException {
42 |
43 | System.out.println("*********Entered trufflehog");
44 |
45 | final Set secrets = new HashSet();
46 |
47 | if (regex.equals("false")) {
48 | regex = "";
49 | } else {
50 | regex = "--regex";
51 | }
52 | System.out.println(":::: Config PATH: "+ConfigData.pythonPath+ "- "+ ConfigData.trufflehogPath);
53 | String[] cmd = {
54 | // "/usr/local/bin/python2.7",
55 | // "/usr/bin/python2.7",
56 | // "/Users/n0r00ij/Downloads/truffleHog-dev/truffleHog/truffleHog/truffleHog.py",
57 | ConfigData.pythonPath, ConfigData.trufflehogPath, regex, "--cleanup", "--entropy=" + entropy, "--json",
58 | link };
59 |
60 | if (!DaoUtil.searchDuplicateByUrl(link)) {
61 | Process p = Runtime.getRuntime().exec(cmd);
62 |
63 | // p.waitFor();
64 | BufferedReader bri = new BufferedReader(new InputStreamReader(p.getInputStream()));
65 | BufferedReader bre = new BufferedReader(new InputStreamReader(p.getErrorStream()));
66 | String line;
67 | while ((line = bri.readLine()) != null) {
68 | // System.out.println(line);
69 | // System.out.println();
70 | secrets.addAll(extractRegexMatches(line, regexForSecret));
71 |
72 | }
73 | bri.close();
74 | while ((line = bre.readLine()) != null) {
75 | // System.out.println(line);
76 | secrets.addAll(extractRegexMatches(line, regexForSecret));
77 |
78 | }
79 | bre.close();
80 | // Important decide if this is needed or remove it
81 | p.waitFor(45, TimeUnit.MINUTES);
82 |
83 | p.destroyForcibly();
84 |
85 | // p.destroy();
86 |
87 | }
88 |
89 | Boolean is_Valid = false;
90 | if (secrets.size() > 0) {
91 | Set temp = new HashSet();
92 | temp.add(link);
93 | EmailUtility.sendEmailUsingGmail(profile, temp, searchTerm);
94 | is_Valid = true;
95 | }
96 |
97 | if (!DaoUtil.searchDuplicateByUrl(link)) {
98 |
99 | DbUtil.addNewEntry(secrets, link, profile, is_Valid);
100 |
101 | }
102 | // System.out.println("*********Done");
103 |
104 | return secrets;
105 | }
106 |
107 | public static Set extractRegexMatches(String line, String regex) {
108 | final Set matchSet = new HashSet();
109 | Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
110 | Matcher matcher = pattern.matcher(line);
111 | while (matcher.find()) {
112 | final SearchItem searchItem = new SearchItem();
113 | searchItem.setSearchItem(matcher.group(1));
114 | matchSet.add(searchItem);
115 | }
116 | return matchSet;
117 | }
118 |
119 | @Override
120 | public void run() {
121 | // TODO Auto-generated method stub
122 | try {
123 | getSecrets();
124 | } catch (IOException | InterruptedException e) {
125 | // TODO Auto-generated catch block
126 | e.printStackTrace();
127 | }
128 |
129 | }
130 |
131 | /**
132 | * public static void main(String args[]) { try {
133 | * System.out.println(getSecrets("https://github.com/cogdog/tweets.git")); }
134 | * catch (IOException e) { // TODO Auto-generated catch block
135 | * e.printStackTrace(); } catch (InterruptedException e) { // TODO
136 | * Auto-generated catch block e.printStackTrace(); } }
137 | **/
138 |
139 | }
140 |
--------------------------------------------------------------------------------
/rts-impl/src/main/resources/META-INF/services/org.rts.base.ScrapperProfile:
--------------------------------------------------------------------------------
1 | org.rts.rtsprofile.TwitterProfile
2 | org.rts.rtsprofile.PastieProfile
3 | org.rts.rtsprofile.GithubProfile
4 | org.rts.rtsprofile.RedditProfile
5 | org.rts.rtsprofile.GistGithubProfile
6 | org.rts.rtsprofile.IdeonecomProfile
7 | org.rts.rtsprofile.KpastenetProfile
8 | org.rts.rtsprofile.PastebincaProfile
9 | org.rts.rtsprofile.PastebinfrProfile
10 | org.rts.rtsprofile.SlexyOrgProfile
11 | org.rts.rtsprofile.CodepadProfile
12 | org.rts.rtsprofile.Pasteorgru
13 | org.rts.rtsprofile.Lpaste
14 | org.rts.rtsprofile.Dumpz
15 | org.rts.rtsprofile.Snipplr
--------------------------------------------------------------------------------
/rts-impl/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Root logger option
2 | log4j.rootLogger=error, stdout, file
3 |
4 | # Redirect log messages to console
5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
6 | log4j.appender.stdout.Target=System.out
7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
8 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
9 |
10 | # Redirect log messages to a log file, support file rolling.
11 | log4j.appender.file=org.apache.log4j.RollingFileAppender
12 | log4j.appender.file.File=RTS.log
13 | log4j.appender.file.MaxFileSize=5MB
14 | log4j.appender.file.MaxBackupIndex=10
15 | log4j.appender.file.layout=org.apache.log4j.PatternLayout
16 | log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
--------------------------------------------------------------------------------
/rts-impl/src/test/java/org/rts/impl/AppTest.java:
--------------------------------------------------------------------------------
1 | package org.rts.impl;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/scrapper_config/consumer.properties:
--------------------------------------------------------------------------------
1 | bootstrap.servers=localhost:9092
2 | group.id=test
3 | enable.auto.commit=true
4 | auto.commit.interval.ms=1000
5 | session.timeout.ms=300000
6 | request.timeout.ms=500000
7 | auto.offset.reset=earliest
8 | key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
9 | value.deserializer=org.kafkaparser.deseralize.DataDeserializer
10 | # These buffer sizes seem to be needed to avoid consumer switching to
11 | # a mode where it processes one bufferful every 5 seconds with multiple
12 | # timeouts along the way. No idea why this happens.
13 | fetch.min.bytes=50000
14 | receive.buffer.bytes=262144
15 | max.partition.fetch.bytes=2097152
--------------------------------------------------------------------------------
/scrapper_config/email.properties:
--------------------------------------------------------------------------------
1 | from-email = invited.tomail@gmail.com
2 | to-email = naveen.rudra02@gmail.com
3 | password = Padma-18
4 | mail.smtp.auth= true
5 | mail.smtp.starttls.enable =true
6 | mail.smtp.host = smtp.gmail.com
7 | mail.smtp.port = 587
8 |
--------------------------------------------------------------------------------
/scrapper_config/global.properties:
--------------------------------------------------------------------------------
1 | pythonpath=/usr/bin/python2.7
2 | trufflehogpath=/home/n0r00ij/truffleHog/truffleHog/truffleHog.py
3 | #pythonpath=/usr/local/bin/python2.7
4 | #trufflehogpath=/Users/n0r00ij/Downloads/truffleHog/truffleHog/truffleHog.py
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/scrapper_config/look for ssrf via host header:
--------------------------------------------------------------------------------
1 | look for ssrf via host header
2 |
3 | Verbose error message:
4 | 1. Add a "Debug=true" directive at the top of the file that generated the error. Example:
5 |
6 | <%@ Page Language="C#" Debug="true" %>
7 |
8 | or:
9 |
10 | 2) Add the following section to the configuration file of your application:
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | Forceful browsing:
19 | https://suppliersourcing.qa.walmart.com/odc_selection.aspx
20 |
21 | Viewstate can be decryoted
--------------------------------------------------------------------------------
/scrapper_config/producer.properties:
--------------------------------------------------------------------------------
1 | bootstrap.servers=localhost:9092
2 | acks=all
3 | retries=0
4 | batch.size=16384
5 | auto.commit.interval.ms=1000
6 | linger.ms=0
7 | key.serializer=org.apache.kafka.common.serialization.StringSerializer
8 | value.serializer=org.kafkaparser.serialize.DataSerializer
9 | block.on.buffer.full=true
--------------------------------------------------------------------------------
/scrapper_config/proxy.properties:
--------------------------------------------------------------------------------
1 | proxy_ip=web-proxy.ind.hp.com
2 | #proxy_ip=127.0.0.1
3 | proxy_port=8080
--------------------------------------------------------------------------------
/scraptool/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/scraptool/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/scraptool/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | scraptool
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/scraptool/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/test/java=UTF-8
4 | encoding/=UTF-8
5 |
--------------------------------------------------------------------------------
/scraptool/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/scraptool/RTS.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/scraptool/RTS.db
--------------------------------------------------------------------------------
/scraptool/RTS.log:
--------------------------------------------------------------------------------
1 | 2019-08-15 19:52:43 ERROR ScrapperTool:90 - Something wrong happened in scrappers
2 | java.lang.NullPointerException
3 | at org.rts.impl.PastieImpl.initScrapper(PastieImpl.java:45)
4 | at org.rts.rtsprofile.CodepadProfile.newInstance(CodepadProfile.java:19)
5 | at org.rts.base.profileregistry.ScrapperProfileRegistry.newScrapProfile(ScrapperProfileRegistry.java:30)
6 | at org.rts.base.ScrapperImpl.initialize(ScrapperImpl.java:51)
7 | at org.rts.base.ScrapperImpl.initialize(ScrapperImpl.java:38)
8 | at org.scraptool.ScrapperTool.initializeScrappers(ScrapperTool.java:40)
9 | at org.scraptool.ScrapperTool.startScrappers(ScrapperTool.java:80)
10 | at org.scraptool.ScrapperTool.main(ScrapperTool.java:127)
11 | 2019-08-15 19:54:17 ERROR ScrapperTool:90 - Something wrong happened in scrappers
12 | java.lang.NullPointerException
13 | at org.rts.impl.PastieImpl.initScrapper(PastieImpl.java:45)
14 | at org.rts.rtsprofile.CodepadProfile.newInstance(CodepadProfile.java:19)
15 | at org.rts.base.profileregistry.ScrapperProfileRegistry.newScrapProfile(ScrapperProfileRegistry.java:30)
16 | at org.rts.base.ScrapperImpl.initialize(ScrapperImpl.java:51)
17 | at org.rts.base.ScrapperImpl.initialize(ScrapperImpl.java:38)
18 | at org.scraptool.ScrapperTool.initializeScrappers(ScrapperTool.java:40)
19 | at org.scraptool.ScrapperTool.startScrappers(ScrapperTool.java:80)
20 | at org.scraptool.ScrapperTool.main(ScrapperTool.java:127)
21 |
--------------------------------------------------------------------------------
/scraptool/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.rts.scrap
7 | rts
8 | 1.0-SNAPSHOT
9 |
10 | com.rts.scrap
11 | scraptool
12 | 1.0-SNAPSHOT
13 | scraptool
14 | http://maven.apache.org
15 |
16 | UTF-8
17 |
18 |
19 |
20 | log4j
21 | log4j
22 | 1.2.17
23 |
24 |
25 | commons-cli
26 | commons-cli
27 | 1.4
28 |
29 |
30 | com.google.guava
31 | guava
32 | 18.0
33 |
34 |
35 | com.rts.scrap
36 | rts-impl
37 | 1.0-SNAPSHOT
38 |
39 |
40 | junit
41 | junit
42 | 3.8.1
43 | test
44 |
45 |
46 |
47 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | org.apache.maven.plugins
67 | maven-shade-plugin
68 | 2.4
69 |
70 |
71 |
72 | package
73 |
74 | shade
75 |
76 |
77 |
78 |
79 | *:*
80 |
81 | META-INF/*.SF
82 | META-INF/*.DSA
83 | META-INF/*.RSA
84 |
85 |
86 |
87 |
88 |
89 |
91 |
92 | org.scraptool.ScrapperTool
93 | 1.0
94 | true
95 |
96 |
97 |
98 |
99 | true
100 | standalone
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/scraptool/src/main/java/org/scraptool/ScrapperTool.java:
--------------------------------------------------------------------------------
1 | package org.scraptool;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.util.HashMap;
6 | import java.util.Map;
7 | import java.util.Map.Entry;
8 | import org.kafkaparser.base.NotificationConsumerGroup;
9 | import org.kafkaparser.base.Producer;
10 | import org.rts.base.Scrapper;
11 | import org.rts.base.ScrapperImpl;
12 | import org.kafkaparser.utilities.ConfigData;
13 |
14 | import org.apache.log4j.Logger;
15 | import org.apache.commons.cli.*;
16 |
17 | public class ScrapperTool {
18 |
19 | static ScrapperImpl scrapperimpl;
20 | static HashMap allThreads = new HashMap();
21 | final static Logger logger = Logger.getLogger(ScrapperTool.class);
22 | public static String configDirectory;
23 | public static String topicname;
24 | static File configDirectoryfile;
25 |
26 | public static void initializeScrappers(String configDirectory)
27 | {
28 | //proper error handling and null pointer exception is a must and code ahs to be written man
29 | //logger check for proeprties file befor ekicking off
30 | //check if internet connection is there,log and pause threads if no internet
31 | //logger check for inetrnet connnection
32 | //logger check for any otehr things like rate limiting 200ok issues and all
33 | //logger for email too
34 |
35 | System.out.println( "Initializing of scrapper has started!!" );
36 | logger.info("Initializing of scrapper has started!!");
37 | try{
38 | scrapperimpl= ScrapperImpl.getInstance();
39 | configDirectoryfile = new File(ConfigData.configDirectory);
40 | scrapperimpl.initialize(configDirectoryfile);
41 | Producer.initialize(configDirectoryfile);
42 |
43 | }
44 | catch (IOException e) {
45 | //logger.error("Iniltizlization of scrappers has failed", e);
46 | e.printStackTrace();
47 | }
48 | }
49 |
50 | public static void checkThreadsStatus() throws InterruptedException
51 | {
52 | while (true)
53 | {
54 | for (Map.Entry entry : allThreads.entrySet())
55 | {
56 | if(!entry.getValue().isAlive())
57 | {
58 | System.out.println("Restrating the thread: "+entry.getKey()+" The reason being it not alive.");
59 | allThreads.remove(entry.getKey());
60 | startThread(entry.getKey());
61 | logger.error(entry.getKey()+"Thread has been restarted succeafully");
62 | }
63 | }
64 | Thread.sleep(60000);
65 | }
66 | }
67 |
68 | public static void startThread(String threadname)
69 | {
70 | Scrapper profile=scrapperimpl.getScrapperMap().get(threadname);
71 | Thread profilethread=new Thread(profile);
72 | profilethread.start();
73 | allThreads.put(threadname, profilethread);
74 | }
75 |
76 | public static void startScrappers()
77 | {
78 |
79 | try {
80 | initializeScrappers(configDirectory);
81 | for (Entry entry : scrapperimpl.getScrapperMap().entrySet())
82 | {
83 | startThread(entry.getKey());
84 | }
85 | NotificationConsumerGroup newgroup;
86 | newgroup = new NotificationConsumerGroup(5, ConfigData.topicName,configDirectoryfile);
87 | newgroup.execute();
88 | //checkThreadsStatus();
89 | } catch (Exception e) {
90 | logger.error("Something wrong happened in scrappers", e);
91 | e.printStackTrace();
92 | }
93 |
94 |
95 | }
96 |
97 | public static void main(String args[])
98 | {
99 | Options options = new Options();
100 |
101 | Option input1 = new Option("c", "configDirectory", true, "configDirectory path");
102 | input1.setRequired(true);
103 | options.addOption(input1);
104 |
105 | Option input2 = new Option("t", "topicname", true, "topicname of kafka");
106 | input2.setRequired(true);
107 | options.addOption(input2);
108 |
109 | CommandLineParser parser = new DefaultParser();
110 | HelpFormatter formatter = new HelpFormatter();
111 | CommandLine cmd;
112 |
113 | try {
114 | cmd = parser.parse(options, args);
115 | } catch (ParseException e) {
116 | System.out.println(e.getMessage());
117 | formatter.printHelp("Scrapper", options);
118 | System.exit(1);
119 | return;
120 | }
121 |
122 | ConfigData.configDirectory=cmd.getOptionValue("configDirectory");
123 | ConfigData.topicName=cmd.getOptionValue("topicname");
124 | //configDirectory = cmd.getOptionValue("configDirectory");
125 | //topicname = cmd.getOptionValue("topicname");
126 |
127 | ScrapperTool.startScrappers();
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/scraptool/src/test/java/org/scraptool/AppTest.java:
--------------------------------------------------------------------------------
1 | package org.scraptool;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/script/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/script/.DS_Store
--------------------------------------------------------------------------------
/script/cleanup.sh:
--------------------------------------------------------------------------------
1 | #delete topic
2 | /opt/kafka/bin/kafka-topics.sh --zookeeper localhost:2181 --delete --topic test
3 |
4 | sleep 2
5 |
6 | #stop kafka server
7 | /opt/kafka/bin/kafka-server-stop.sh
8 |
9 | sleep 5
10 | #stop zookeeper
11 | /opt/kafka/bin/zookeeper-server-stop.sh
12 |
13 | sleep 1
14 |
15 | rm -rf /tmp/kafka-logs
16 | rm -rf /opt/kafka/logs
--------------------------------------------------------------------------------
/script/db_setup.sh:
--------------------------------------------------------------------------------
1 | echo "create database rttm COLLATE 'utf8_unicode_ci';" |mysql -uroot
2 | echo "CREATE USER 'admin'@'localhost' IDENTIFIED BY 'password';" |mysql -uroot
3 | echo "GRANT ALL PRIVILEGES ON *.* TO 'admin'@'localhost' WITH GRANT OPTION;" |mysql -uroot
4 | echo "CREATE USER 'admin'@'%' IDENTIFIED BY 'password';" |mysql -uroot
5 | echo "GRANT ALL PRIVILEGES ON *.* TO 'admin'@'%' WITH GRANT OPTION;" |mysql -uroot
6 | echo "SHOW GRANTS FOR admin;" |mysql -uroot
7 | echo "FLUSH PRIVILEGES;" |mysql -uroot
--------------------------------------------------------------------------------
/script/initialize.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Update server.properties when executing with delete.topic.enable=true
4 | # env var will set when executing docker run
5 |
6 |
7 | # Start to run zookeeper as background process
8 | /opt/kafka/bin/zookeeper-server-start.sh /opt/kafka/config/zookeeper.properties &
9 |
10 | sleep 3
11 |
12 | # Start kafka server
13 | /opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/server.properties &
14 |
15 | seelp 4
16 |
17 | #Create a topic
18 | /opt/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test
19 |
20 |
21 | #Start mysql server
22 | service mysql start
23 |
24 | git config --global user.email "test@example.com"
25 |
26 |
27 | exit
28 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | sqlite-dataaccess
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/main/resources=UTF-8
4 | encoding//src/test/java=UTF-8
5 | encoding/=UTF-8
6 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | com.rts.scrap
7 | rts
8 | 1.0-SNAPSHOT
9 |
10 | com.rts.scrap
11 | sqlite-dataaccess
12 | 1.0-SNAPSHOT
13 | sqlite-dataaccess
14 | http://maven.apache.org
15 |
16 |
17 | UTF-8
18 |
19 |
20 |
21 |
22 | org.hibernate.javax.persistence
23 | hibernate-jpa-2.1-api
24 | 1.0.2.Final
25 |
26 |
27 |
28 | org.hibernate
29 | hibernate-entitymanager
30 | 5.4.4.Final
31 |
32 |
33 | mysql
34 | mysql-connector-java
35 | 8.0.14
36 |
37 |
38 | org.xerial
39 | sqlite-jdbc
40 | 3.6.17
41 | test
42 |
43 |
44 | org.slf4j
45 | slf4j-log4j12
46 | 1.6.4
47 |
48 |
49 |
50 | log4j
51 | log4j
52 | 1.2.17
53 |
54 |
55 | junit
56 | junit
57 | 4.9
58 | test
59 |
60 |
61 | org.xerial
62 | sqlite-jdbc
63 | 3.21.0
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/src/main/java/org/sqlite/dataaccess/entity/Result.java:
--------------------------------------------------------------------------------
1 | package org.sqlite.dataaccess.entity;
2 |
3 | import java.io.Serializable;
4 | import java.util.HashSet;
5 | import java.util.Set;
6 |
7 | import javax.persistence.CascadeType;
8 | import javax.persistence.Column;
9 | import javax.persistence.Entity;
10 | import javax.persistence.FetchType;
11 | import javax.persistence.Id;
12 | import javax.persistence.JoinColumn;
13 | import javax.persistence.JoinTable;
14 | import javax.persistence.Lob;
15 | import javax.persistence.ManyToMany;
16 |
17 | /**
18 | *
19 | * @author Josue R G Junior josueribeiro.jr@gmail.com
20 | */
21 | @Entity
22 | public class Result implements Serializable {
23 |
24 | private static final long serialVersionUID = -7250234396452258822L;
25 |
26 | @Id
27 | @Column(name = "url",unique = true, updatable = false, nullable = false)
28 | private String url;
29 | private String time;
30 | private String searchedtext;
31 | @Column(length=1000000)
32 | @Lob
33 | @ManyToMany(fetch=FetchType.EAGER, cascade = {CascadeType.PERSIST, CascadeType.MERGE})
34 | @JoinTable(name="result_search_item", joinColumns = {
35 | @JoinColumn(name = "url", nullable = false)},
36 | inverseJoinColumns = { @JoinColumn(name = "id",
37 | nullable = false)}
38 | )
39 | private Set searchedItemSet = new HashSet();
40 | @Column(name = "is_valid")
41 | private Boolean isValid;
42 |
43 | public Boolean getIsValid() {
44 | return isValid;
45 | }
46 |
47 | public void setIsValid(Boolean isValid) {
48 | this.isValid = isValid;
49 | }
50 |
51 | public String getBotName() {
52 | return botName;
53 | }
54 |
55 | public void setBotName(String botName) {
56 | this.botName = botName;
57 | }
58 |
59 | private String botName;
60 |
61 | public String getUrl() {
62 | return url;
63 | }
64 |
65 | public void setUrl(String url) {
66 | this.url = url;
67 | }
68 |
69 | public String getTime() {
70 | return time;
71 | }
72 |
73 | public void setTime(String time) {
74 | this.time = time;
75 | }
76 |
77 | public String getSearchedtext() {
78 | return searchedtext;
79 | }
80 |
81 | public void setSearchedtext(String searchedtext) {
82 | this.searchedtext = searchedtext;
83 | }
84 |
85 | @Override
86 | public int hashCode() {
87 | final int prime = 31;
88 | int result = 1;
89 | result = prime * result + ((time == null) ? 0 : time.hashCode());
90 | result = prime * result + ((url == null) ? 0 : url.hashCode());
91 | return result;
92 | }
93 |
94 | @Override
95 | public boolean equals(Object obj) {
96 | if (this == obj)
97 | return true;
98 | if (obj == null)
99 | return false;
100 | if (getClass() != obj.getClass())
101 | return false;
102 | Result other = (Result) obj;
103 | if (time == null) {
104 | if (other.time != null)
105 | return false;
106 | } else if (!time.equals(other.time))
107 | return false;
108 | if (url == null) {
109 | if (other.url != null)
110 | return false;
111 | } else if (!url.equals(other.url))
112 | return false;
113 | return true;
114 | }
115 |
116 | // add one extra column from future perspective
117 | // add one extra column if it is false or true
118 | public Set getSearchedTerms() {
119 | return searchedItemSet;
120 | }
121 |
122 | public void setSearchedTerms(Set searchedItemSet) {
123 | this.searchedItemSet = searchedItemSet;
124 | }
125 |
126 | @Override
127 | public String toString() {
128 | return "Result [url=" + url + ", time=" + time + ", searchedtext=" + searchedtext + ", searchedItemSet="
129 | + searchedItemSet + ", isValid=" + isValid + ", botName=" + botName + "]";
130 | }
131 |
132 | }
--------------------------------------------------------------------------------
/sqlite-dataaccess/src/main/java/org/sqlite/dataaccess/entity/SearchItem.java:
--------------------------------------------------------------------------------
1 | package org.sqlite.dataaccess.entity;
2 |
3 | import java.io.Serializable;
4 | import java.util.HashSet;
5 | import java.util.Set;
6 |
7 | import javax.persistence.Column;
8 | import javax.persistence.Entity;
9 | import javax.persistence.FetchType;
10 | import javax.persistence.GeneratedValue;
11 | import javax.persistence.GenerationType;
12 | import javax.persistence.Id;
13 | import javax.persistence.ManyToMany;
14 | import javax.persistence.Table;
15 |
16 | /**
17 | *
18 | * @author Sunny Sharma sunnysharmagts@gmail.com
19 | */
20 | @Entity
21 | @Table(name = "search_item")
22 | public class SearchItem implements Serializable {
23 |
24 | @Id
25 | @GeneratedValue(strategy = GenerationType.IDENTITY)
26 | @Column(name = "id", unique = true, updatable=false, nullable = false)
27 | private Integer id;
28 |
29 | @Column(name = "search_term", unique = true, nullable = false)
30 | private String searchItem;
31 |
32 | @ManyToMany(fetch = FetchType.EAGER, mappedBy = "searchedItemSet")
33 | private Set resultSet = new HashSet();
34 |
35 | public Integer getId() {
36 | return id;
37 | }
38 |
39 | public void setId(final int id) {
40 | this.id = id;
41 | }
42 |
43 | public String getSearchItem() {
44 | return searchItem;
45 | }
46 |
47 | public void setSearchItem(String searchItem) {
48 | this.searchItem = searchItem;
49 | }
50 |
51 | public Set getResult() {
52 | return this.resultSet;
53 | }
54 |
55 | public void setResult(Set resultSet) {
56 | this.resultSet = resultSet;
57 | }
58 |
59 | public void addResult(Result result) {
60 | this.resultSet.add(result);
61 | result.getSearchedTerms().add(this);
62 | }
63 |
64 | public void removeResult(Result result) {
65 | this.resultSet.remove(result);
66 | result.getSearchedTerms().remove(this);
67 | }
68 |
69 | @Override
70 | public String toString() {
71 | return "SearchItem [id=" + id + ", searchItem=" + searchItem + "]";
72 | }
73 | }
--------------------------------------------------------------------------------
/sqlite-dataaccess/src/main/java/org/sqlite/dataaccess/util/DaoUtil.java:
--------------------------------------------------------------------------------
1 | package org.sqlite.dataaccess.util;
2 |
3 | import java.text.DateFormat;
4 | import java.text.SimpleDateFormat;
5 | import java.util.ArrayList;
6 | import java.util.Date;
7 | import java.util.HashSet;
8 | import java.util.Set;
9 |
10 | import javax.persistence.TypedQuery;
11 | import javax.transaction.Transactional;
12 |
13 | import org.sqlite.dataaccess.entity.Result;
14 | import org.sqlite.dataaccess.entity.SearchItem;
15 |
16 | public class DaoUtil {
17 |
18 | static
19 | {
20 | EMfactory.setUp();
21 | EMfactory.initEntityManager();
22 | }
23 |
24 | @Transactional
25 | public synchronized static void insert(Result data) {
26 | try {
27 | EMfactory.em.getTransaction().begin();
28 | EMfactory.em.persist(data);
29 | EMfactory.em.getTransaction().commit();
30 | } catch(final Exception e) {
31 | e.printStackTrace();
32 | } finally {
33 | //EMfactory.em.close();
34 | }
35 | }
36 |
37 | @Transactional
38 | public synchronized static void merge(Result data) {
39 | try {
40 | EMfactory.em.getTransaction().begin();
41 | EMfactory.em.merge(data);
42 | EMfactory.em.getTransaction().commit();
43 | } catch(final Exception e) {
44 | e.printStackTrace();
45 | } finally {
46 | //EMfactory.em.close();
47 | }
48 | }
49 |
50 | @Transactional
51 | public synchronized static boolean searchDuplicateByUrl(String url)
52 | {
53 | //System.out.println("In db url is : "+url);
54 |
55 | //TypedQuery query = EMfactory.em.createQuery(
56 | // "SELECT result FROM Result result where result.url='"+url+"'" , Result.class);
57 | TypedQuery query = EMfactory.em.createQuery(
58 | "SELECT result FROM Result result where result.url=:url" , Result.class);
59 | query.setParameter("url", url);
60 | ArrayList results = (ArrayList) query.getResultList();
61 |
62 | //System.out.println("query size :"+Integer.toString(results.size()));
63 | if(results.size()>0)
64 | {
65 | return true;
66 | }
67 | return false;
68 | }
69 |
70 | public static void main (String [] args)
71 | {
72 | // Result person = new Result();
73 | // ArrayList test=new ArrayList<>();
74 | // test.add("asd");
75 | // person.setSearchedTerms(test);
76 | // person.setSearchedtext("some lines up and down man");
77 | // person.setUrl("http://google.com4");
78 | // DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
79 | // Date date = new Date();
80 | // person.setTime(dateFormat.format(date).toString());
81 | System.out.println("Stated intializing*****************************************************************************");
82 | for (int i=0;i<3;i++)
83 | {
84 | if(searchDuplicateByUrl("http://google.com71"))
85 | {
86 | System.out.println(Integer.toString(i) +" *************************-----found");
87 | System.out.println(Integer.toString(i) +" *************************-----found");
88 | System.out.println(Integer.toString(i) +" *************************-----found");
89 | }
90 |
91 | else
92 | {
93 | System.out.println(Integer.toString(i) +" *************************-----not found");
94 | System.out.println(Integer.toString(i) +" *************************-----not found");
95 | System.out.println(Integer.toString(i) +" *************************-----not found");
96 |
97 | }
98 |
99 | Result person = new Result();
100 | Set test=new HashSet();
101 | final SearchItem searchItem = new SearchItem();
102 | searchItem.setSearchItem("asd");
103 | test.add(searchItem);
104 | person.setSearchedTerms(test);
105 | person.setSearchedtext("some lines up and down man");
106 | person.setUrl("http://google.com71");
107 | DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
108 | Date date = new Date();
109 | person.setTime(dateFormat.format(date).toString());
110 | DaoUtil.insert(person);
111 | Result person1 = new Result();
112 | ArrayList test1=new ArrayList<>();
113 | test1.add("asdq");
114 | person1.setSearchedTerms(test);
115 | person1.setSearchedtext("some lines up and down man");
116 | person1.setUrl("http://google.com712");
117 | DaoUtil.insert(person1);
118 |
119 |
120 | }
121 | //EMfactory.em.persist(person);
122 | //EMfactory.em.getTransaction().commit();
123 |
124 |
125 | }
126 | }
127 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/src/main/java/org/sqlite/dataaccess/util/EMfactory.java:
--------------------------------------------------------------------------------
1 | package org.sqlite.dataaccess.util;
2 |
3 | import javax.persistence.Persistence;
4 | import javax.persistence.EntityManager;
5 | import javax.persistence.EntityManagerFactory;
6 |
7 | public class EMfactory {
8 |
9 | public static EntityManagerFactory emf;
10 | public static EntityManager em;
11 |
12 | public static void setUp() {
13 | emf = Persistence.createEntityManagerFactory("mysql-dataAccess");
14 | }
15 |
16 | public static void initEntityManager() {
17 | em = emf.createEntityManager();
18 |
19 |
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/src/main/resources/META-INF/persistence.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | org.hibernate.ejb.HibernatePersistence
6 | org.sqlite.dataaccess.entity.Result
7 | org.sqlite.dataaccess.entity.SearchItem
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | org.hibernate.ejb.HibernatePersistence
20 | org.sqlite.dataaccess.entity.SearchItem
21 | org.sqlite.dataaccess.entity.Result
22 |
23 |
24 |
25 |
26 |
27 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/sqlite-dataaccess/src/main/resources/import.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/sqlite-dataaccess/src/main/resources/import.sql
--------------------------------------------------------------------------------
/sqlite-dataaccess/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
2 | log4j.appender.stdout.Target=System.out
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
5 | log4j.rootLogger=all, stdout
6 | #log4j.logger.org.hibernate=all
7 | #log4j.logger.org.hibernate.type=all
8 | #log4j.logger.org.hibernate.tool.hbm2ddl=debug
--------------------------------------------------------------------------------
/sqlite-dataaccess/src/test/java/org/sqlite/dataaccess/AppTest.java:
--------------------------------------------------------------------------------
1 | package org.sqlite.dataaccess;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------