├── .DS_Store ├── .project ├── .settings ├── org.eclipse.core.resources.prefs └── org.eclipse.m2e.core.prefs ├── Dockerfile ├── JPA-Access ├── .classpath ├── .project ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── pom.xml ├── src │ ├── META-INF │ │ └── persistence.xml │ ├── main │ │ └── java │ │ │ ├── com │ │ │ └── rts │ │ │ │ └── mysql │ │ │ │ ├── dao │ │ │ │ └── Result.java │ │ │ │ └── util │ │ │ │ ├── DaoUtil.java │ │ │ │ └── EMfactory.java │ │ │ └── org │ │ │ └── JPA │ │ │ └── Access │ │ │ └── App.java │ └── test │ │ └── java │ │ └── org │ │ └── JPA │ │ └── Access │ │ └── AppTest.java └── target │ ├── classes │ ├── META-INF │ │ ├── MANIFEST.MF │ │ └── maven │ │ │ └── com.rts.scrap │ │ │ └── JPA-Access │ │ │ ├── pom.properties │ │ │ └── pom.xml │ ├── com │ │ └── rts │ │ │ └── mysql │ │ │ ├── dao │ │ │ └── Result.class │ │ │ └── util │ │ │ ├── DaoUtil.class │ │ │ └── EMfactory.class │ └── org │ │ └── JPA │ │ └── Access │ │ └── App.class │ └── test-classes │ └── org │ └── JPA │ └── Access │ └── AppTest.class ├── License ├── RTS.db ├── RTS.log ├── RTTM_Logo.png ├── Readme.md ├── architecture.png ├── docs ├── .DS_Store ├── Makefile ├── build │ ├── doctrees │ │ ├── apiKeySetUp.doctree │ │ ├── architecture.doctree │ │ ├── configuration.doctree │ │ ├── contents.doctree │ │ ├── contributors.doctree │ │ ├── debugging.doctree │ │ ├── environment.pickle │ │ ├── index.doctree │ │ ├── intro.doctree │ │ ├── license.doctree │ │ ├── roadmap.doctree │ │ └── setupGuide.doctree │ └── html │ │ ├── .buildinfo │ │ ├── _images │ │ ├── RTTM_Logo.png │ │ └── architecture.png │ │ ├── _sources │ │ ├── apiKeySetUp.md.txt │ │ ├── architecture.md.txt │ │ ├── configuration.md.txt │ │ ├── contents.rst.txt │ │ ├── contributors.md.txt │ │ ├── debugging.md.txt │ │ ├── index.rst.txt │ │ ├── intro.md.txt │ │ ├── license.md.txt │ │ ├── roadmap.md.txt │ │ └── setupGuide.md.txt │ │ ├── _static │ │ ├── RTTM_Logo.png │ │ ├── alabaster.css │ │ ├── architecture.png │ │ ├── basic.css │ │ ├── css │ │ │ ├── badge_only.css │ │ │ └── theme.css │ │ ├── custom.css │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── file.png │ │ ├── fonts │ │ │ ├── Inconsolata-Bold.ttf │ │ │ ├── Inconsolata-Regular.ttf │ │ │ ├── Inconsolata.ttf │ │ │ ├── Lato-Bold.ttf │ │ │ ├── Lato-Regular.ttf │ │ │ ├── Lato │ │ │ │ ├── lato-bold.eot │ │ │ │ ├── lato-bold.ttf │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-bolditalic.eot │ │ │ │ ├── lato-bolditalic.ttf │ │ │ │ ├── lato-bolditalic.woff │ │ │ │ ├── lato-bolditalic.woff2 │ │ │ │ ├── lato-italic.eot │ │ │ │ ├── lato-italic.ttf │ │ │ │ ├── lato-italic.woff │ │ │ │ ├── lato-italic.woff2 │ │ │ │ ├── lato-regular.eot │ │ │ │ ├── lato-regular.ttf │ │ │ │ ├── lato-regular.woff │ │ │ │ └── lato-regular.woff2 │ │ │ ├── RobotoSlab-Bold.ttf │ │ │ ├── RobotoSlab-Regular.ttf │ │ │ ├── RobotoSlab │ │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ │ └── roboto-slab-v7-regular.woff2 │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.svg │ │ │ ├── fontawesome-webfont.ttf │ │ │ ├── fontawesome-webfont.woff │ │ │ └── fontawesome-webfont.woff2 │ │ ├── jquery-3.4.1.js │ │ ├── jquery.js │ │ ├── js │ │ │ ├── modernizr.min.js │ │ │ └── theme.js │ │ ├── language_data.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── underscore-1.3.1.js │ │ └── underscore.js │ │ ├── apiKeySetUp.html │ │ ├── architecture.html │ │ ├── configuration.html │ │ ├── contents.html │ │ ├── contributors.html │ │ ├── debugging.html │ │ ├── genindex.html │ │ ├── index.html │ │ ├── intro.html │ │ ├── license.html │ │ ├── objects.inv │ │ ├── roadmap.html │ │ ├── search.html │ │ ├── searchindex.js │ │ └── setupGuide.html ├── make.bat └── source │ ├── .DS_Store │ ├── _static │ ├── RTTM_Logo.png │ └── architecture.png │ ├── apiKeySetUp.md │ ├── architecture.md │ ├── conf.py │ ├── configuration.md │ ├── contents.rst │ ├── contributors.md │ ├── debugging.md │ ├── index.rst │ ├── intro.md │ ├── license.md │ ├── roadmap.md │ └── setupGuide.md ├── kafka-parser ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── html-mail-template.ftl ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── kafkaparser │ │ │ ├── base │ │ │ ├── NotificationConsumerGroup.java │ │ │ ├── NotificationConsumerThread.java │ │ │ └── Producer.java │ │ │ ├── deseralize │ │ │ └── DataDeserializer.java │ │ │ ├── pojo │ │ │ └── Data.java │ │ │ ├── serialize │ │ │ └── DataSerializer.java │ │ │ └── utilities │ │ │ ├── ConfigData.java │ │ │ ├── ConfigParams.java │ │ │ ├── DbUtil.java │ │ │ ├── EmailUtility.java │ │ │ ├── Git.java │ │ │ ├── HttpUtilities.java │ │ │ ├── PastieParseAndSearch.java │ │ │ ├── Search.java │ │ │ ├── SearchThread.java │ │ │ └── TruffleHog.java │ └── resources │ │ └── html-mail-template.ftl │ └── test │ └── java │ └── org │ └── kafka │ └── parser │ └── AppTest.java ├── pom.xml ├── rts-base ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── pom.xml └── src │ ├── main │ └── java │ │ └── org │ │ └── rts │ │ └── base │ │ ├── Scrapper.java │ │ ├── ScrapperImpl.java │ │ ├── ScrapperProfile.java │ │ ├── exceptions │ │ └── ScrapperNotvalidException.java │ │ ├── profileregistry │ │ └── ScrapperProfileRegistry.java │ │ └── utilities │ │ └── PropertyUtilities.java │ └── test │ └── java │ └── org │ └── rts │ └── base │ └── AppTest.java ├── rts-impl ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── pom.xml └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── rts │ │ │ ├── impl │ │ │ ├── GithubImpl.java │ │ │ ├── PastieImpl.java │ │ │ ├── RedditImpl.java │ │ │ └── TwitterImpl.java │ │ │ ├── rtsprofile │ │ │ ├── CodepadProfile.java │ │ │ ├── Dumpz.java │ │ │ ├── GistGithubProfile.java │ │ │ ├── GithubProfile.java │ │ │ ├── IdeonecomProfile.java │ │ │ ├── KpastenetProfile.java │ │ │ ├── Lpaste.java │ │ │ ├── PastebincaProfile.java │ │ │ ├── PastebinfrProfile.java │ │ │ ├── PastebinruProfile.java │ │ │ ├── Pasteorgru.java │ │ │ ├── PastieProfile.java │ │ │ ├── RedditProfile.java │ │ │ ├── SlexyOrgProfile.java │ │ │ ├── Snipplr.java │ │ │ └── TwitterProfile.java │ │ │ └── utilities │ │ │ ├── Difference.java │ │ │ ├── JsonParserForGithub.java │ │ │ ├── JsonParserForReddit.java │ │ │ └── TruffleHog.java │ └── resources │ │ ├── META-INF │ │ └── services │ │ │ └── org.rts.base.ScrapperProfile │ │ └── log4j.properties │ └── test │ └── java │ └── org │ └── rts │ └── impl │ └── AppTest.java ├── scrapper_config ├── consumer.properties ├── email.properties ├── global.properties ├── html-mail-template.ftl ├── look for ssrf via host header ├── producer.properties ├── proxy.properties ├── scanner-configuration.properties └── useragents-list.txt ├── scraptool ├── .classpath ├── .gitignore ├── .project ├── .settings │ ├── org.eclipse.core.resources.prefs │ ├── org.eclipse.jdt.core.prefs │ └── org.eclipse.m2e.core.prefs ├── RTS.db ├── RTS.log ├── pom.xml └── src │ ├── main │ └── java │ │ └── org │ │ └── scraptool │ │ └── ScrapperTool.java │ └── test │ └── java │ └── org │ └── scraptool │ └── AppTest.java ├── script ├── .DS_Store ├── cleanup.sh ├── db_setup.sh └── initialize.sh └── sqlite-dataaccess ├── .classpath ├── .gitignore ├── .project ├── .settings ├── org.eclipse.core.resources.prefs ├── org.eclipse.jdt.core.prefs └── org.eclipse.m2e.core.prefs ├── pom.xml └── src ├── main ├── java │ └── org │ │ └── sqlite │ │ └── dataaccess │ │ ├── entity │ │ ├── Result.java │ │ └── SearchItem.java │ │ └── util │ │ ├── DaoUtil.java │ │ ├── EMfactory.java │ │ └── SQLiteDialect.java └── resources │ ├── META-INF │ └── persistence.xml │ ├── import.sql │ └── log4j.properties └── test └── java └── org └── sqlite └── dataaccess └── AppTest.java /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/.DS_Store -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | rts 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.m2e.core.maven2Builder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.m2e.core.maven2Nature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding/=UTF-8 3 | -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8 2 | 3 | ENV DEBIAN_FRONTEND noninteractive 4 | 5 | RUN apt-get update 6 | 7 | RUN apt install -y maven 8 | 9 | RUN apt-get install git 10 | 11 | RUN git config --global user.email "test@rttm.com" 12 | 13 | RUN apt-get install -y mysql-server 14 | 15 | RUN apt-get install -y \ 16 | zookeeper \ 17 | wget \ 18 | dnsutils \ 19 | vim \ 20 | && rm -rf /var/lib/apt/lists/* 21 | 22 | ENV KAFKA_VERSION 2.1.1 23 | ENV SCALA_VERSION 2.11 24 | RUN wget -q \ 25 | http://apache.mirrors.spacedump.net/kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz \ 26 | -O /tmp/kafka.tgz \ 27 | && tar xfz /tmp/kafka.tgz -C /opt \ 28 | && rm /tmp/kafka.tgz \ 29 | && mv /opt/kafka_${SCALA_VERSION}-${KAFKA_VERSION} /opt/kafka 30 | 31 | 32 | WORKDIR /opt/RTTM 33 | 34 | RUN git clone https://github.com/NaveenRudra/RTTM.git 35 | 36 | RUN git clone https://github.com/dxa4481/truffleHog.git 37 | 38 | RUN wget https://bootstrap.pypa.io/get-pip.py 39 | 40 | RUN python get-pip.py 41 | 42 | RUN rm get-pip.py 43 | 44 | 45 | WORKDIR /opt/RTTM/truffleHog 46 | 47 | RUN pip install -r requirements.txt 48 | 49 | RUN python setup.py install 50 | 51 | 52 | WORKDIR /opt/RTTM/RTTM 53 | 54 | RUN mvn install -D skipTests 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /JPA-Access/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /JPA-Access/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | JPA-Access 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /JPA-Access/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/test/java=UTF-8 4 | encoding/=UTF-8 5 | -------------------------------------------------------------------------------- /JPA-Access/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 3 | org.eclipse.jdt.core.compiler.compliance=1.8 4 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 5 | org.eclipse.jdt.core.compiler.release=disabled 6 | org.eclipse.jdt.core.compiler.source=1.8 7 | -------------------------------------------------------------------------------- /JPA-Access/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /JPA-Access/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.rts.scrap 7 | rts 8 | 1.0-SNAPSHOT 9 | 10 | com.rts.scrap 11 | JPA-Access 12 | 1.0-SNAPSHOT 13 | JPA-Access 14 | http://maven.apache.org 15 | 16 | UTF-8 17 | 18 | 19 | 20 | junit 21 | junit 22 | 3.8.1 23 | test 24 | 25 | 26 | javax.persistence 27 | persistence-api 28 | 1.0.2 29 | provided 30 | 31 | 32 | javax.transaction 33 | javax.transaction-api 34 | 1.2 35 | provided 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /JPA-Access/src/META-INF/persistence.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | de.vogella.jpa.simple.model.Todo 7 | 8 | 9 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /JPA-Access/src/main/java/com/rts/mysql/dao/Result.java: -------------------------------------------------------------------------------- 1 | package com.rts.mysql.dao; 2 | 3 | import java.io.Serializable; 4 | import java.util.ArrayList; 5 | 6 | import javax.persistence.Column; 7 | import javax.persistence.Entity; 8 | import javax.persistence.GeneratedValue; 9 | import javax.persistence.GenerationType; 10 | import javax.persistence.Id; 11 | 12 | /** 13 | * 14 | * @author Josue R G Junior josueribeiro.jr@gmail.com 15 | */ 16 | @Entity 17 | public class Result implements Serializable { 18 | 19 | private static final long serialVersionUID = -7250234396452258822L; 20 | 21 | @Id 22 | @Column(name = "id_scrapper") 23 | @GeneratedValue(strategy = GenerationType.AUTO) 24 | private Integer id; 25 | private String url; 26 | private String time; 27 | private String searchedtext; 28 | 29 | @ManytoMany(fetch = FetchType.LAZY, cascade = CascadeType.ALL) 30 | @JoinTable(name = "result_search_item", joinColumns = { 31 | @JoinColumn(name = "url", nullable = false, updatable = false) }, inverseJoinColumns = { 32 | @JoinColumn(name = "id", nullable = false, updatable = false) }) 33 | private Set searchedTerms; 34 | 35 | public String getBotName() { 36 | return botName; 37 | } 38 | 39 | public void setBotName(String botName) { 40 | this.botName = botName; 41 | } 42 | 43 | private String botName; 44 | 45 | // add one extra column from future perspective 46 | // add one extra column if it is false or true 47 | public ArrayList getSearchedTerms() { 48 | return searchedTerms; 49 | } 50 | 51 | public void setSearchedTerms(Set searchedTerms) { 52 | this.searchedTerms = searchedTerms; 53 | } 54 | 55 | public String getUrl() { 56 | return url; 57 | } 58 | 59 | public void setUrl(String url) { 60 | this.url = url; 61 | } 62 | 63 | public String getTime() { 64 | return time; 65 | } 66 | 67 | public void setTime(String time) { 68 | this.time = time; 69 | } 70 | 71 | public String getSearchedtext() { 72 | return searchedtext; 73 | } 74 | 75 | public void setSearchedtext(String searchedtext) { 76 | this.searchedtext = searchedtext; 77 | } 78 | 79 | public Integer getId() { 80 | return id; 81 | } 82 | 83 | public void setId(Integer id) { 84 | this.id = id; 85 | } 86 | 87 | } -------------------------------------------------------------------------------- /JPA-Access/src/main/java/com/rts/mysql/util/DaoUtil.java: -------------------------------------------------------------------------------- 1 | package com.rts.mysql.util; 2 | 3 | import java.text.DateFormat; 4 | import java.text.SimpleDateFormat; 5 | import java.util.ArrayList; 6 | import java.util.Date; 7 | import javax.transaction.Transactional; 8 | 9 | import com.rts.mysql.dao.Result; 10 | 11 | public class DaoUtil { 12 | 13 | static 14 | { 15 | EMfactory.setUp(); 16 | EMfactory.initEntityManager(); 17 | } 18 | 19 | @Transactional 20 | public synchronized static void insert(Result data) 21 | { 22 | EMfactory.em.getTransaction().begin(); 23 | EMfactory.em.persist(data); 24 | EMfactory.em.getTransaction().commit(); 25 | 26 | } 27 | 28 | @Transactional 29 | public synchronized static boolean searchDuplicateByUrl(String url) 30 | { 31 | //System.out.println("In db url is : "+url); 32 | TypedQuery query = EMfactory.em.createQuery( 33 | "SELECT result FROM Result result where result.url='"+url+"'" , Result.class); 34 | ArrayList results = (ArrayList) query.getResultList(); 35 | 36 | //System.out.println("query size :"+Integer.toString(results.size())); 37 | if(results.size()>0) 38 | { 39 | return true; 40 | } 41 | return false; 42 | } 43 | 44 | public static void main (String [] args) 45 | { 46 | // Result person = new Result(); 47 | // ArrayList test=new ArrayList<>(); 48 | // test.add("asd"); 49 | // person.setSearchedTerms(test); 50 | // person.setSearchedtext("some lines up and down man"); 51 | // person.setUrl("http://google.com4"); 52 | // DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); 53 | // Date date = new Date(); 54 | // person.setTime(dateFormat.format(date).toString()); 55 | System.out.println("Stated intializing*****************************************************************************"); 56 | for (int i=0;i<3;i++) 57 | { 58 | if(searchDuplicateByUrl("http://google.com71")) 59 | { 60 | System.out.println(Integer.toString(i) +" *************************-----found"); 61 | System.out.println(Integer.toString(i) +" *************************-----found"); 62 | System.out.println(Integer.toString(i) +" *************************-----found"); 63 | } 64 | 65 | else 66 | { 67 | System.out.println(Integer.toString(i) +" *************************-----not found"); 68 | System.out.println(Integer.toString(i) +" *************************-----not found"); 69 | System.out.println(Integer.toString(i) +" *************************-----not found"); 70 | 71 | } 72 | 73 | Result person = new Result(); 74 | ArrayList test=new ArrayList<>(); 75 | test.add("asd"); 76 | person.setSearchedTerms(test); 77 | person.setSearchedtext("some lines up and down man"); 78 | person.setUrl("http://google.com71"); 79 | DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); 80 | Date date = new Date(); 81 | person.setTime(dateFormat.format(date).toString()); 82 | DaoUtil.insert(person); 83 | Result person1 = new Result(); 84 | ArrayList test1=new ArrayList<>(); 85 | test1.add("asdq"); 86 | person1.setSearchedTerms(test); 87 | person1.setSearchedtext("some lines up and down man"); 88 | person1.setUrl("http://google.com712"); 89 | DaoUtil.insert(person1); 90 | 91 | 92 | } 93 | //EMfactory.em.persist(person); 94 | //EMfactory.em.getTransaction().commit(); 95 | 96 | 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /JPA-Access/src/main/java/com/rts/mysql/util/EMfactory.java: -------------------------------------------------------------------------------- 1 | package com.rts.mysql.util; 2 | 3 | import javax.persistence.Persistence; 4 | import javax.persistence.EntityManager; 5 | import javax.persistence.EntityManagerFactory; 6 | 7 | public class EMfactory { 8 | 9 | public static EntityManagerFactory emf; 10 | public static EntityManager em; 11 | 12 | public static void setUp() { 13 | emf = Persistence.createEntityManagerFactory("sqlite-dataAccess"); 14 | } 15 | 16 | public static void initEntityManager() { 17 | em = emf.createEntityManager(); 18 | 19 | 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /JPA-Access/src/main/java/org/JPA/Access/App.java: -------------------------------------------------------------------------------- 1 | package org.JPA.Access; 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | public class App 8 | { 9 | public static void main( String[] args ) 10 | { 11 | System.out.println( "Hello World!" ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /JPA-Access/src/test/java/org/JPA/Access/AppTest.java: -------------------------------------------------------------------------------- 1 | package org.JPA.Access; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /JPA-Access/target/classes/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Built-By: n0r00ij 3 | Build-Jdk: 11.0.2 4 | Created-By: Maven Integration for Eclipse 5 | 6 | -------------------------------------------------------------------------------- /JPA-Access/target/classes/META-INF/maven/com.rts.scrap/JPA-Access/pom.properties: -------------------------------------------------------------------------------- 1 | #Generated by Maven Integration for Eclipse 2 | #Wed Aug 14 20:21:01 IST 2019 3 | m2e.projectLocation=/Users/n0r00ij/Documents/GitHub/RTS/JPA-Access 4 | m2e.projectName=JPA-Access 5 | groupId=com.rts.scrap 6 | artifactId=JPA-Access 7 | version=1.0-SNAPSHOT 8 | -------------------------------------------------------------------------------- /JPA-Access/target/classes/META-INF/maven/com.rts.scrap/JPA-Access/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.rts.scrap 7 | rts 8 | 1.0-SNAPSHOT 9 | 10 | com.rts.scrap 11 | JPA-Access 12 | 1.0-SNAPSHOT 13 | JPA-Access 14 | http://maven.apache.org 15 | 16 | UTF-8 17 | 18 | 19 | 20 | junit 21 | junit 22 | 3.8.1 23 | test 24 | 25 | 26 | javax.persistence 27 | persistence-api 28 | 1.0.2 29 | provided 30 | 31 | 32 | javax.transaction 33 | javax.transaction-api 34 | 1.2 35 | provided 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /JPA-Access/target/classes/com/rts/mysql/dao/Result.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/classes/com/rts/mysql/dao/Result.class -------------------------------------------------------------------------------- /JPA-Access/target/classes/com/rts/mysql/util/DaoUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/classes/com/rts/mysql/util/DaoUtil.class -------------------------------------------------------------------------------- /JPA-Access/target/classes/com/rts/mysql/util/EMfactory.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/classes/com/rts/mysql/util/EMfactory.class -------------------------------------------------------------------------------- /JPA-Access/target/classes/org/JPA/Access/App.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/classes/org/JPA/Access/App.class -------------------------------------------------------------------------------- /JPA-Access/target/test-classes/org/JPA/Access/AppTest.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/JPA-Access/target/test-classes/org/JPA/Access/AppTest.class -------------------------------------------------------------------------------- /RTS.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/RTS.db -------------------------------------------------------------------------------- /RTS.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/RTS.log -------------------------------------------------------------------------------- /RTTM_Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/RTTM_Logo.png -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 |
6 |

Real Time Threat Monitoring Tool V2.0

7 | 8 |
9 | 10 | Monitoring possible threats of your company on Internet is an impossible task to be achieved manually. Hence many threats of the company goes unnoticed until it becomes viral in public. Thus causing monetary/reputation damage. This is where RTTM comes into action. 11 | RTTM (Real Time Threat Monitoring Tool) is a tool developed to scrap all pasties,github,reddit..etc in real time to identify occurrence of search terms configured. Upon match an email will be triggered. Thus allowing company to react in case of leakage of code, any hacks tweeted..etc.. and harden themselves against an attack before it goes viral. 12 | 13 | Over the past 2 years the tool has evolved from simple search. Artificial intelligence has been implemented to perform better search based on context. If regex is needed even that is supported. Thus behaviour is close to human and reduces false positives. 14 | 15 | The best part of tool is that alert will be sent to email in less that 60 seconds from the time threat has made it to interent. Thus allowing response in real time to happen.. 16 | 17 | 18 | 19 | The same tool in malicious user hands can be used offensively to get update on any latest hacks, code leakage etc.. 20 | 21 | List of sites which will be monitored are: 22 |
    23 |
  • Non-Pastie Sites
  • 24 |
      25 |
    • Twitter
    • 26 |
    • Reddit
    • 27 |
    • Github
    • 28 |
    29 |
  • Pastie Sites
  • 30 |
      31 |
    • Pastebin.com
    • 32 |
    • Codepad.org
    • 33 |
    • Dumpz.org
    • 34 |
    • Snipplr.com
    • 35 |
    • Paste.org.ru
    • 36 |
    • Gist.github.com
    • 37 |
    • Pastebin.ca
    • 38 |
    • Kpaste.net
    • 39 |
    • Slexy.org
    • 40 |
    • Ideone.com
    • 41 |
    • Pastebin.fr
    • 42 |
    43 |
44 | 45 | 46 | 47 |

Architecture:

48 | 49 | 50 |

How it works?

51 | Once the tool is started , engine gets kicked off and it runs forever. The main input for this engine is the configuration file. Based on the configuration file data, engine goes ahead and probes twitter/github/reddit for matches configured in configuration file. Upon a match is found, the link of twitter/github/reddit pushed to sqlite DB and an email alert is triggered. 52 | 53 | In case of pastie sites the logic is different. The reason being they do not support search nor streaming api's. Hence any new pastie made by any user, the link is fetched and pushed to kafka. From kafka any new link added is picked up and searched for matches configured in configuration file. Upon a match is found, the link of pastie site is pushed to sqlite DB and an email alert is triggered. 54 | 55 | Over the past 2 years the tool has evolved from simple search. Artificial intelligence has been implemented to perform better search based on context. If regex is needed even that is supported. Thus behaviour is close to human and reduces false positives. 56 | 57 |

Detailed Tool Documentation:

58 | https://real-time-threat-monitoring.readthedocs.io/en/latest/ 59 | 60 | 61 |

Developers:

62 | 63 | Authors: 64 |
    65 |
  • Naveen Rudrappa
  • 66 |
67 | 68 | Contributors: 69 |
    70 |
  • Sunny Sharma
  • 71 |
  • Murali Segu
  • 72 |
73 | 74 | -------------------------------------------------------------------------------- /architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/architecture.png -------------------------------------------------------------------------------- /docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/.DS_Store -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/build/doctrees/apiKeySetUp.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/apiKeySetUp.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/architecture.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/architecture.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/configuration.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/configuration.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/contents.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/contents.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/contributors.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/contributors.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/debugging.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/debugging.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/intro.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/intro.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/license.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/license.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/roadmap.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/roadmap.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/setupGuide.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/doctrees/setupGuide.doctree -------------------------------------------------------------------------------- /docs/build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: b0c4f5b42daa0bbaa7b8887f888115eb 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/build/html/_images/RTTM_Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_images/RTTM_Logo.png -------------------------------------------------------------------------------- /docs/build/html/_images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_images/architecture.png -------------------------------------------------------------------------------- /docs/build/html/_sources/apiKeySetUp.md.txt: -------------------------------------------------------------------------------- 1 | API Key Setup 2 | ============= 3 | 4 | Twitter API Key: 5 | ---------------- 6 | * Go to https://dev.twitter.com/apps/new and log in, if necessary 7 | 8 | * Enter your Application Name, Description and your website address. You can leave the callback URL empty. 9 | 10 | * Accept the TOS, and solve the CAPTCHA. 11 | 12 | * Submit the form by clicking the Create your Twitter Application 13 | 14 | * Copy the consumer key (API key) and consumer secret from the screen into your application 15 | 16 | Github API Key: 17 | --------------- 18 | 19 | Refer link https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line for detailed information. 20 | 21 | Pastebin.com 22 | ------------- 23 | In case of pastebin.com it is not neccessary to generate apiKey but you need to whitelist your IP. For this you have to pay and get your IP whitelisted. 24 | 25 | Refer https://pastebin.com/doc_scraping_api for detailed information. 26 | -------------------------------------------------------------------------------- /docs/build/html/_sources/architecture.md.txt: -------------------------------------------------------------------------------- 1 | Architecture 2 | ============= 3 | 4 | .. image:: _static/architecture.png 5 | :width: 800 6 | :alt: Alternative text 7 | 8 | The architectural diagram of the tool is as above. 9 | 10 | How it works 11 | ----------------- 12 | 13 | Once the tool is started , engine gets kicked off and it runs forever. The main input for this engine is the configuration file. Based on the configuration file data, engine goes ahead and probes twitter/github/reddit for matches configured in configuration file. Upon a match is found, the link of twitter/github/reddit pushed to sqlite DB and an email alert is triggered. 14 | 15 | In case of pastie sites the logic is different. The reason being they do not support search nor streaming api's. Hence any new pastie made by any user, the link is fetched and pushed to kafka. From kafka any new link added is picked up and searched for matches configured in configuration file. Upon a match is found, the link of pastie site is pushed to sqlite DB and an email alert is triggered. -------------------------------------------------------------------------------- /docs/build/html/_sources/configuration.md.txt: -------------------------------------------------------------------------------- 1 | Configuration 2 | ============= 3 | 4 | Before using this tool is is neccessary to understand the properties file present in scrapper_config directory. 5 | 6 | consumer.properties 7 | ------------------------ 8 | Holds all the neccessary config data needed for consumer of Kafka (Refer apache Kafka guide for more information). The values present here are default options and does nto require any changes 9 | 10 | producer.properties 11 | ------------------------ 12 | Holds all the neccessary config data needed for Producer (Refer apache Kafka guide for more information).The values present here are default options and does nto require any changes 13 | 14 | email.properties 15 | ------------------------ 16 | Holds all the configuration data to send email. 17 | 18 | scanner-configuration.properties 19 | ------------------------------------- 20 | This is the core configuration file. Update all the config for enabling search on twitter/github(To get tokens and key refer respective sites). 21 | 22 | For pastie sites and reddit there is no need for any changes in config. 23 | 24 | *Note:However in all cases make sure to change "searchterms" to values of our choice to search. If there are multiple search terms then add them seperate by comma like the example data provided in config file.* 25 | 26 | **Understanding more about scanner-configuration.properties file.** 27 | 28 | For any pastie site configuration is as below: 29 | 30 | *Note:leave the pastie sites configuration as is and just change the search terms as requried by the organization. This will do good.* 31 | 32 | * scrapper.(pastie name).profile=(Pastie profile name) 33 | 34 | * scrapper.(pastie name).homeurl=(URL from where pastie ids a extracted) 35 | 36 | * scrapper.(pastie name).regex=(Regex to fetch pastie ids) 37 | 38 | * scrapper.(pastie name).downloadurl= (URL to get information about each apstie) 39 | 40 | * scrapper.(pastie name).searchterms=(Mention terms to be searched seperated by comma) 41 | 42 | * scrapper.(pastie name).timetosleep=(Time for which pastie thread will sleep before fetching pastie ids again) 43 | 44 | For github search configuration is as below: 45 | 46 | * scrapper.github.profile=Github 47 | 48 | * scrapper.github.baseurl=https://api.github.com/search/code?q={searchTerm}&sort=indexed&order=asc 49 | 50 | * scrapper.github.access_token=(Get your own github access token) 51 | 52 | * scrapper.github.searchterms=(Mention terms to be searched seperated by comma) 53 | 54 | * scrapper.github.timetosleep=(Time for which github thred should sleep before searching again) 55 | 56 | 57 | For reditt search configuration is as below: 58 | * scrapper.reddit.profile=Reddit 59 | 60 | * scrapper.reddit.baseurl=https://www.reddit.com/search.json?q={searchterm} 61 | 62 | * scrapper.reddit.searchterms=(Mention terms to be searched seperated by comma) 63 | 64 | * scrapper.reddit.timetosleep=(Time for which github thred should sleep before searching again) 65 | 66 | 67 | For Twitter search configuration is as below: 68 | * scrapper.twitter.apikey=test 69 | 70 | * scrapper.twitter.profile=Twitter 71 | 72 | * scrapper.twitter.searchterms=(Mention terms to be searched seperated by comma) 73 | 74 | * scrapper.twitter.consumerKey=(Get your own consumer key) 75 | 76 | * scrapper.twitter.consumerSecret=(Get your own consumerSecret) 77 | 78 | * scrapper.twitter.accessToken=(Get your own accessToken) 79 | 80 | * scrapper.twitter.accessTokenSecret=(Get your own accessTokenSecret) 81 | -------------------------------------------------------------------------------- /docs/build/html/_sources/contents.rst.txt: -------------------------------------------------------------------------------- 1 | .. Real Time Threat Monitoring Tool documentation master file, created by 2 | sphinx-quickstart on Sun Nov 10 06:40:56 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Real Time Threat Monitoring Tool's documentation! 7 | ============================================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | intro 14 | architecture 15 | configuration 16 | apiKeySetUp 17 | setupGuide 18 | debugging 19 | roadmap 20 | contributors 21 | license 22 | -------------------------------------------------------------------------------- /docs/build/html/_sources/contributors.md.txt: -------------------------------------------------------------------------------- 1 | Contributors 2 | ============ 3 | 4 | Well, lets accept the fact that nothing goes well without contributors. Here is the list of people who have helped (`@rttmscrapper `_) grow in its first phase. 5 | 6 | Author: 7 | ------------------------ 8 | 9 | Folks who took out time from busy schedule and got their hands dirty with the code: 10 | 11 | * Naveen Rudrappa 12 | 13 | Contributors: 14 | ------------------------ 15 | * Murali Krishna Segu 16 | 17 | * Sunny Sharma 18 | 19 | Mentors: 20 | ------------- 21 | 22 | Chaps who were generous enough to give feedback and suggest changes: 23 | 24 | * Murali Krishna Segu 25 | -------------------------------------------------------------------------------- /docs/build/html/_sources/debugging.md.txt: -------------------------------------------------------------------------------- 1 | Debugging 2 | ========= 3 | 4 | Follow below steps in case you find issues while working of tool. 5 | 6 | * Whenever you face issue with tool. Look into logs what is displayed. Sometime apiKey will expired you may have to regenerate it. 7 | 8 | * Run *clean.sh* in scrip folder. 9 | 10 | * Now run *intialize.sh* 11 | 12 | * Now run *java -jar scraptool/target/scraptool-1.0-SNAPSHOT-standalone.jar -t test -c /home/n0r00ij/RTS/scrapper_config/* -------------------------------------------------------------------------------- /docs/build/html/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. Real Time Threat Monitoring Tool documentation master file, created by 2 | sphinx-quickstart on Sun Nov 10 06:40:56 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Real Time Threat Monitoring Tool's documentation! 7 | ============================================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | intro 14 | architecture 15 | configuration 16 | apiKeySetUp 17 | setupGuide 18 | debugging 19 | roadmap 20 | contributors 21 | license 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/build/html/_sources/intro.md.txt: -------------------------------------------------------------------------------- 1 | Intro 2 | ===== 3 | 4 | .. image:: _static/RTTM_Logo.png 5 | :scale: 30 % 6 | :align: right 7 | :class: intro-logo 8 | 9 | Why this tool? 10 | ------------------ 11 | Monitoring possible threats of your company on Internet is an impossible task to be achieved manually. Hence many threats of the company goes unnoticed until it becomes viral in public. Thus causing monetary/reputation damage. This is where RTTM comes into action. RTTM (Real Time Threat Monitoring Tool) is a tool developed to scrap all pasties,github,reddit..etc in real time to identify occurrence of search terms configured. Upon match an email will be triggered. Thus allowing company to react in case of leakage of code, any hacks tweeted..etc.. and harden themselves against an attack before it goes viral. 12 | 13 | Over the past 2 years the tool has evolved from simple search. Artificial intelligence has been implemented to perform better search based on context. If regex is needed even that is supported. Thus behaviour is close to human and reduces false positives. 14 | 15 | The best part of tool is that alert will be sent to email in less that 60 seconds from the time threat has made it to interent. Thus allowing response in real time to happen.. 16 | 17 | The same tool in malicious user hands can be used offensively to get update on any latest hacks, code leakage etc.. 18 | 19 | List of sites which will be monitored are: 20 | 21 | Non-Pastie Sites: 22 | ------------------------ 23 | 24 | * Twitter 25 | 26 | * Reddit 27 | 28 | * Github 29 | 30 | 31 | Pastie Sites 32 | ------------------------ 33 | 34 | * Pastebin.com 35 | 36 | * Codepad.org 37 | 38 | * Dumpz.org 39 | 40 | * Snipplr.com 41 | 42 | * Paste.org.ru 43 | 44 | * Gist.github.com 45 | 46 | * Pastebin.ca 47 | 48 | * Kpaste.net 49 | 50 | * Slexy.org 51 | 52 | * Ideone.com 53 | 54 | * Pastebin.fr 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /docs/build/html/_sources/license.md.txt: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. -------------------------------------------------------------------------------- /docs/build/html/_sources/roadmap.md.txt: -------------------------------------------------------------------------------- 1 | RoadMap 2 | ======= 3 | 4 | We are having following ideas to include in upcoming versions: 5 | 6 | * Enhance artificial intelligence for search 7 | 8 | * Implement worker nodes for searching 9 | 10 | * Support google dork,linkedin,fark web etc.. 11 | 12 | * Support other DB's like postgress/oracle/Mongo etc.. 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/build/html/_sources/setupGuide.md.txt: -------------------------------------------------------------------------------- 1 | SetupGuide 2 | ========== 3 | 4 | Install via Docker 5 | --------------------------------------------------- 6 | 7 | * Install docker in your system 8 | 9 | * Download Dockerfile from https://github.com/NaveenRudra/RTTM 10 | 11 | * Change directory to RTTM 12 | 13 | * execute *docker build .* 14 | 15 | * Run *docker exec -it /bin/bash* 16 | 17 | * Now once in docker navigate to */opt/RTTM/script* 18 | 19 | * Run *intialize.sh* script. This will boot mysql server and starts kafka. 20 | 21 | * Run *db_setup.sh* this will created needed table. 22 | 23 | * Now from */opt/RTTM* run command *java -jar scraptool/target/scraptool-1.0-SNAPSHOT-standalone.jar -t test -c /home/n0r00ij/RTS/scrapper_config/* 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/build/html/_static/RTTM_Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/RTTM_Logo.png -------------------------------------------------------------------------------- /docs/build/html/_static/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/architecture.png -------------------------------------------------------------------------------- /docs/build/html/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-weight:normal;font-style:normal;src:url("../fonts/fontawesome-webfont.eot");src:url("../fonts/fontawesome-webfont.eot?#iefix") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff") format("woff"),url("../fonts/fontawesome-webfont.ttf") format("truetype"),url("../fonts/fontawesome-webfont.svg#FontAwesome") format("svg")}.fa:before{display:inline-block;font-family:FontAwesome;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa{display:inline-block;text-decoration:inherit}li .fa{display:inline-block}li .fa-large:before,li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-0.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before,ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before{content:""}.icon-book:before{content:""}.fa-caret-down:before{content:""}.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.icon-caret-up:before{content:""}.fa-caret-left:before{content:""}.icon-caret-left:before{content:""}.fa-caret-right:before{content:""}.icon-caret-right:before{content:""}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#2980B9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} 2 | -------------------------------------------------------------------------------- /docs/build/html/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* This file intentionally left blank. */ 2 | -------------------------------------------------------------------------------- /docs/build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '1.0', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | FILE_SUFFIX: '.html', 7 | HAS_SOURCE: true, 8 | SOURCELINK_SUFFIX: '.txt', 9 | NAVIGATION_WITH_KEYS: false 10 | }; -------------------------------------------------------------------------------- /docs/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/file.png -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Inconsolata-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Inconsolata-Bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Inconsolata-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Inconsolata-Regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Inconsolata.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Inconsolata.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato-Bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato-Regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab-Bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab-Regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/build/html/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/build/html/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/build/html/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | /* sphinx_rtd_theme version 0.4.3 | MIT license */ 2 | /* Built 20190212 16:02 */ 3 | require=function r(s,a,l){function c(e,n){if(!a[e]){if(!s[e]){var i="function"==typeof require&&require;if(!n&&i)return i(e,!0);if(u)return u(e,!0);var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}var o=a[e]={exports:{}};s[e][0].call(o.exports,function(n){return c(s[e][1][n]||n)},o,o.exports,r,s,a,l)}return a[e].exports}for(var u="function"==typeof require&&require,n=0;n"),i("table.docutils.footnote").wrap("
"),i("table.docutils.citation").wrap("
"),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var e=i(this);expand=i(''),expand.on("click",function(n){return t.toggleCurrent(e),n.stopPropagation(),!1}),e.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var r=0,n=["ms","moz","webkit","o"],e=0;eNUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/source/.DS_Store -------------------------------------------------------------------------------- /docs/source/_static/RTTM_Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/source/_static/RTTM_Logo.png -------------------------------------------------------------------------------- /docs/source/_static/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/docs/source/_static/architecture.png -------------------------------------------------------------------------------- /docs/source/apiKeySetUp.md: -------------------------------------------------------------------------------- 1 | API Key Setup 2 | ============= 3 | 4 | Twitter API Key: 5 | ---------------- 6 | * Go to https://dev.twitter.com/apps/new and log in, if necessary 7 | 8 | * Enter your Application Name, Description and your website address. You can leave the callback URL empty. 9 | 10 | * Accept the TOS, and solve the CAPTCHA. 11 | 12 | * Submit the form by clicking the Create your Twitter Application 13 | 14 | * Copy the consumer key (API key) and consumer secret from the screen into your application 15 | 16 | Github API Key: 17 | --------------- 18 | 19 | Refer link https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line for detailed information. 20 | 21 | Pastebin.com 22 | ------------- 23 | In case of pastebin.com it is not neccessary to generate apiKey but you need to whitelist your IP. For this you have to pay and get your IP whitelisted. 24 | 25 | Refer https://pastebin.com/doc_scraping_api for detailed information. 26 | -------------------------------------------------------------------------------- /docs/source/architecture.md: -------------------------------------------------------------------------------- 1 | Architecture 2 | ============= 3 | 4 | .. image:: _static/architecture.png 5 | :width: 800 6 | :alt: Alternative text 7 | 8 | The architectural diagram of the tool is as above. 9 | 10 | How it works 11 | ----------------- 12 | 13 | Once the tool is started , engine gets kicked off and it runs forever. The main input for this engine is the configuration file. Based on the configuration file data, engine goes ahead and probes twitter/github/reddit for matches configured in configuration file. Upon a match is found, the link of twitter/github/reddit pushed to sqlite DB and an email alert is triggered. 14 | 15 | In case of pastie sites the logic is different. The reason being they do not support search nor streaming api's. Hence any new pastie made by any user, the link is fetched and pushed to kafka. From kafka any new link added is picked up and searched for matches configured in configuration file. Upon a match is found, the link of pastie site is pushed to sqlite DB and an email alert is triggered. -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'Real Time Threat Monitoring Tool' 21 | copyright = '2019, Naveen Rudrappa' 22 | author = 'Naveen Rudrappa' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '1.0' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = [] 43 | 44 | source_suffix=['.rst','.md'] 45 | 46 | 47 | # -- Options for HTML output ------------------------------------------------- 48 | 49 | # The theme to use for HTML and HTML Help pages. See the documentation for 50 | # a list of builtin themes. 51 | # 52 | html_theme = 'sphinx_rtd_theme' 53 | 54 | # Add any paths that contain custom static files (such as style sheets) here, 55 | # relative to this directory. They are copied after the builtin static files, 56 | # so a file named "default.css" will overwrite the builtin "default.css". 57 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /docs/source/configuration.md: -------------------------------------------------------------------------------- 1 | Configuration 2 | ============= 3 | 4 | Before using this tool is is neccessary to understand the properties file present in scrapper_config directory. 5 | 6 | consumer.properties 7 | ------------------------ 8 | Holds all the neccessary config data needed for consumer of Kafka (Refer apache Kafka guide for more information). The values present here are default options and does nto require any changes 9 | 10 | producer.properties 11 | ------------------------ 12 | Holds all the neccessary config data needed for Producer (Refer apache Kafka guide for more information).The values present here are default options and does nto require any changes 13 | 14 | email.properties 15 | ------------------------ 16 | Holds all the configuration data to send email. 17 | 18 | scanner-configuration.properties 19 | ------------------------------------- 20 | This is the core configuration file. Update all the config for enabling search on twitter/github(To get tokens and key refer respective sites). 21 | 22 | For pastie sites and reddit there is no need for any changes in config. 23 | 24 | *Note:However in all cases make sure to change "searchterms" to values of our choice to search. If there are multiple search terms then add them seperate by comma like the example data provided in config file.* 25 | 26 | **Understanding more about scanner-configuration.properties file.** 27 | 28 | For any pastie site configuration is as below: 29 | 30 | *Note:leave the pastie sites configuration as is and just change the search terms as requried by the organization. This will do good.* 31 | 32 | * scrapper.(pastie name).profile=(Pastie profile name) 33 | 34 | * scrapper.(pastie name).homeurl=(URL from where pastie ids a extracted) 35 | 36 | * scrapper.(pastie name).regex=(Regex to fetch pastie ids) 37 | 38 | * scrapper.(pastie name).downloadurl= (URL to get information about each apstie) 39 | 40 | * scrapper.(pastie name).searchterms=(Mention terms to be searched seperated by comma) 41 | 42 | * scrapper.(pastie name).timetosleep=(Time for which pastie thread will sleep before fetching pastie ids again) 43 | 44 | For github search configuration is as below: 45 | 46 | * scrapper.github.profile=Github 47 | 48 | * scrapper.github.baseurl=https://api.github.com/search/code?q={searchTerm}&sort=indexed&order=asc 49 | 50 | * scrapper.github.access_token=(Get your own github access token) 51 | 52 | * scrapper.github.searchterms=(Mention terms to be searched seperated by comma) 53 | 54 | * scrapper.github.timetosleep=(Time for which github thred should sleep before searching again) 55 | 56 | 57 | For reditt search configuration is as below: 58 | * scrapper.reddit.profile=Reddit 59 | 60 | * scrapper.reddit.baseurl=https://www.reddit.com/search.json?q={searchterm} 61 | 62 | * scrapper.reddit.searchterms=(Mention terms to be searched seperated by comma) 63 | 64 | * scrapper.reddit.timetosleep=(Time for which github thred should sleep before searching again) 65 | 66 | 67 | For Twitter search configuration is as below: 68 | * scrapper.twitter.apikey=test 69 | 70 | * scrapper.twitter.profile=Twitter 71 | 72 | * scrapper.twitter.searchterms=(Mention terms to be searched seperated by comma) 73 | 74 | * scrapper.twitter.consumerKey=(Get your own consumer key) 75 | 76 | * scrapper.twitter.consumerSecret=(Get your own consumerSecret) 77 | 78 | * scrapper.twitter.accessToken=(Get your own accessToken) 79 | 80 | * scrapper.twitter.accessTokenSecret=(Get your own accessTokenSecret) 81 | -------------------------------------------------------------------------------- /docs/source/contents.rst: -------------------------------------------------------------------------------- 1 | .. Real Time Threat Monitoring Tool documentation master file, created by 2 | sphinx-quickstart on Sun Nov 10 06:40:56 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Real Time Threat Monitoring Tool's documentation! 7 | ============================================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | intro 14 | architecture 15 | configuration 16 | apiKeySetUp 17 | setupGuide 18 | debugging 19 | roadmap 20 | contributors 21 | license 22 | -------------------------------------------------------------------------------- /docs/source/contributors.md: -------------------------------------------------------------------------------- 1 | Contributors 2 | ============ 3 | 4 | Well, lets accept the fact that nothing goes well without contributors. Here is the list of people who have helped (`@rttmscrapper `_) grow in its first phase. 5 | 6 | Author: 7 | ------------------------ 8 | 9 | Folks who took out time from busy schedule and got their hands dirty with the code: 10 | 11 | * Naveen Rudrappa 12 | 13 | Contributors: 14 | ------------------------ 15 | * Murali Krishna Segu 16 | 17 | * Sunny Sharma 18 | 19 | Mentors: 20 | ------------- 21 | 22 | Chaps who were generous enough to give feedback and suggest changes: 23 | 24 | * Murali Krishna Segu 25 | -------------------------------------------------------------------------------- /docs/source/debugging.md: -------------------------------------------------------------------------------- 1 | Debugging 2 | ========= 3 | 4 | Follow below steps in case you find issues while working of tool. 5 | 6 | * Whenever you face issue with tool. Look into logs what is displayed. Sometime apiKey will expired you may have to regenerate it. 7 | 8 | * Run *clean.sh* in scrip folder. 9 | 10 | * Now run *intialize.sh* 11 | 12 | * Now run *java -jar scraptool/target/scraptool-1.0-SNAPSHOT-standalone.jar -t test -c /home/n0r00ij/RTS/scrapper_config/* -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Real Time Threat Monitoring Tool documentation master file, created by 2 | sphinx-quickstart on Sun Nov 10 06:40:56 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Real Time Threat Monitoring Tool's documentation! 7 | ============================================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | intro 14 | architecture 15 | configuration 16 | apiKeySetUp 17 | setupGuide 18 | debugging 19 | roadmap 20 | contributors 21 | license 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/source/intro.md: -------------------------------------------------------------------------------- 1 | Intro 2 | ===== 3 | 4 | .. image:: _static/RTTM_Logo.png 5 | :scale: 30 % 6 | :align: right 7 | :class: intro-logo 8 | 9 | Why this tool? 10 | ------------------ 11 | Monitoring possible threats of your company on Internet is an impossible task to be achieved manually. Hence many threats of the company goes unnoticed until it becomes viral in public. Thus causing monetary/reputation damage. This is where RTTM comes into action. RTTM (Real Time Threat Monitoring Tool) is a tool developed to scrap all pasties,github,reddit..etc in real time to identify occurrence of search terms configured. Upon match an email will be triggered. Thus allowing company to react in case of leakage of code, any hacks tweeted..etc.. and harden themselves against an attack before it goes viral. 12 | 13 | Over the past 2 years the tool has evolved from simple search. Artificial intelligence has been implemented to perform better search based on context. If regex is needed even that is supported. Thus behaviour is close to human and reduces false positives. 14 | 15 | The best part of tool is that alert will be sent to email in less that 60 seconds from the time threat has made it to interent. Thus allowing response in real time to happen.. 16 | 17 | The same tool in malicious user hands can be used offensively to get update on any latest hacks, code leakage etc.. 18 | 19 | List of sites which will be monitored are: 20 | 21 | Non-Pastie Sites: 22 | ------------------------ 23 | 24 | * Twitter 25 | 26 | * Reddit 27 | 28 | * Github 29 | 30 | 31 | Pastie Sites 32 | ------------------------ 33 | 34 | * Pastebin.com 35 | 36 | * Codepad.org 37 | 38 | * Dumpz.org 39 | 40 | * Snipplr.com 41 | 42 | * Paste.org.ru 43 | 44 | * Gist.github.com 45 | 46 | * Pastebin.ca 47 | 48 | * Kpaste.net 49 | 50 | * Slexy.org 51 | 52 | * Ideone.com 53 | 54 | * Pastebin.fr 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /docs/source/license.md: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. -------------------------------------------------------------------------------- /docs/source/roadmap.md: -------------------------------------------------------------------------------- 1 | RoadMap 2 | ======= 3 | 4 | We are having following ideas to include in upcoming versions: 5 | 6 | * Enhance artificial intelligence for search 7 | 8 | * Implement worker nodes for searching 9 | 10 | * Support google dork,linkedin,fark web etc.. 11 | 12 | * Support other DB's like postgress/oracle/Mongo etc.. 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/source/setupGuide.md: -------------------------------------------------------------------------------- 1 | SetupGuide 2 | ========== 3 | 4 | Install via Docker 5 | --------------------------------------------------- 6 | 7 | * Install docker in your system 8 | 9 | * Download Dockerfile from https://github.com/NaveenRudra/RTTM 10 | 11 | * Change directory to RTTM 12 | 13 | * execute *docker build .* 14 | 15 | * Run *docker exec -it /bin/bash* 16 | 17 | * Now once in docker navigate to */opt/RTTM/script* 18 | 19 | * Run *intialize.sh* script. This will boot mysql server and starts kafka. 20 | 21 | * Run *db_setup.sh* this will created needed table. 22 | 23 | * Now from */opt/RTTM* run command *java -jar scraptool/target/scraptool-1.0-SNAPSHOT-standalone.jar -t test -c /home/n0r00ij/RTS/scrapper_config/* 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /kafka-parser/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /kafka-parser/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /kafka-parser/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | kafka-parser 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /kafka-parser/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/main/resources=UTF-8 4 | encoding//src/test/java=UTF-8 5 | encoding/=UTF-8 6 | -------------------------------------------------------------------------------- /kafka-parser/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /kafka-parser/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 4.0.0 7 | 8 | com.rts.scrap 9 | rts 10 | 1.0-SNAPSHOT 11 | 12 | com.rts.scrap 13 | kafka-parser 14 | 1.0-SNAPSHOT 15 | kafka-parser 16 | http://maven.apache.org 17 | 18 | UTF-8 19 | 20 | 21 | 22 | 23 | net.amygdalum 24 | stringsearchalgorithms 25 | 0.3.4 26 | 27 | 28 | org.freemarker 29 | freemarker 30 | 2.3.20 31 | 32 | 33 | javax.mail 34 | mail 35 | 1.4 36 | 37 | 38 | com.fasterxml.jackson.core 39 | jackson-databind 40 | 2.9.0 41 | 42 | 43 | com.fasterxml.jackson.core 44 | jackson-annotations 45 | 2.9.0 46 | 47 | 48 | com.fasterxml.jackson.core 49 | jackson-core 50 | 2.9.0 51 | 52 | 53 | 54 | org.apache.commons 55 | commons-io 56 | 1.3.2 57 | 58 | 59 | org.apache.commons 60 | commons-lang3 61 | 3.1 62 | 63 | 64 | org.apache.kafka 65 | kafka-clients 66 | 0.9.0.0 67 | 68 | 69 | com.google.guava 70 | guava 71 | 18.0 72 | 73 | 74 | com.rts.scrap 75 | sqlite-dataaccess 76 | 1.0-SNAPSHOT 77 | 78 | 79 | org.hdrhistogram 80 | HdrHistogram 81 | 2.1.8 82 | 83 | 84 | junit 85 | junit 86 | 4.9 87 | test 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/base/NotificationConsumerGroup.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.base; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | public final class NotificationConsumerGroup { 9 | private final int numberOfConsumers; 10 | 11 | private final String topic; 12 | private List consumers; 13 | 14 | 15 | public NotificationConsumerGroup(int numberOfConsumers,String topic,File configDirectoryfile) throws IOException { 16 | this.topic=topic; 17 | this.numberOfConsumers = numberOfConsumers; 18 | consumers = new ArrayList<>(); 19 | for (int i = 0; i < this.numberOfConsumers; i++) { 20 | NotificationConsumerThread ncThread = 21 | new NotificationConsumerThread(this.topic,configDirectoryfile); 22 | consumers.add(ncThread); 23 | } 24 | } 25 | 26 | public void execute() { 27 | for (NotificationConsumerThread ncThread : consumers) { 28 | Thread t = new Thread(ncThread); 29 | t.start(); 30 | } 31 | } 32 | 33 | 34 | 35 | public int getNumberOfConsumers() { 36 | return numberOfConsumers; 37 | } 38 | 39 | 40 | 41 | 42 | 43 | } 44 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/base/NotificationConsumerThread.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.base; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.File; 5 | import java.io.IOException; 6 | import java.nio.charset.Charset; 7 | import java.nio.file.Files; 8 | import java.nio.file.Paths; 9 | import java.util.ArrayList; 10 | import java.util.Arrays; 11 | import java.util.List; 12 | import java.util.Properties; 13 | import java.util.Random; 14 | import org.apache.kafka.clients.consumer.ConsumerRecord; 15 | import org.apache.kafka.clients.consumer.ConsumerRecords; 16 | import org.apache.kafka.clients.consumer.KafkaConsumer; 17 | import org.kafkaparser.utilities.ConfigData; 18 | import org.kafkaparser.utilities.PastieParseAndSearch; 19 | import org.kafkaparser.pojo.Data; 20 | 21 | public class NotificationConsumerThread implements Runnable { 22 | 23 | private final KafkaConsumer consumer; 24 | private final String topic; 25 | private static List userAgents = new ArrayList<>(); 26 | 27 | 28 | public static void initialize(String configDirectory) 29 | { 30 | 31 | try { 32 | //userAgents = Files.readAllLines(Paths.get(ConfigData.configDirectory,ConfigData.useragents_listPropertiesFileName), 33 | // Charset.defaultCharset()); 34 | //static block is initilized before initilaizing variables is causing issue. Danger comment 35 | ConfigData.userAgents = Files.readAllLines(Paths.get(ConfigData.configDirectory,ConfigData.useragents_listPropertiesFileName), 36 | Charset.defaultCharset()); 37 | Properties prop = getConfig(new File(configDirectory),"global.properties"); 38 | ConfigData.pythonPath = prop.getProperty("pythonpath"); 39 | ConfigData.trufflehogPath = prop.getProperty("trufflehogpath"); 40 | //ConfigData.pythonPath = prop.getProperty(""); 41 | 42 | } catch (IOException e) { 43 | // TODO Auto-generated catch block 44 | e.printStackTrace(); 45 | } 46 | 47 | 48 | } 49 | 50 | public NotificationConsumerThread(String topic,File configDirectoryfile) throws IOException { 51 | Properties prop = getConfig(configDirectoryfile,"consumer.properties"); 52 | this.consumer = new KafkaConsumer<>(prop); 53 | this.topic = topic; 54 | this.consumer.subscribe(Arrays.asList(this.topic)); 55 | 56 | initialize(configDirectoryfile.getAbsolutePath()); 57 | 58 | 59 | 60 | } 61 | 62 | private static Properties getConfig(File configDirectoryfile,String propFileName) throws IOException { 63 | Properties properties = new Properties(); 64 | properties.load(new ByteArrayInputStream(Files.readAllBytes(new File(configDirectoryfile, propFileName).toPath()))); 65 | if (properties.getProperty("group.id") == null) { 66 | properties.setProperty("group.id", "group-" + new Random().nextInt(100000)); 67 | } 68 | return properties; 69 | } 70 | 71 | @Override 72 | public void run() { 73 | try { 74 | while (true) 75 | { 76 | ConsumerRecords records = consumer.poll(100); 77 | for (ConsumerRecord record : records) 78 | { 79 | /**System.out.println("Receive message: " + record.value() + ", Partition: " 80 | + record.partition() + ", Offset: " + record.offset() + ", by ThreadID: " 81 | + Thread.currentThread().getId());**/ 82 | //System.out.println("Message recieved !!*********************************************************"+record.value().getUrl()); 83 | //Make sure to enable below code for sending resposne and confirming if chanegs has been done or not 84 | Thread.sleep(200); 85 | PastieParseAndSearch.searchEachid(record.value()); 86 | } 87 | } 88 | } 89 | catch (Exception e) { 90 | e.printStackTrace(); 91 | } 92 | } 93 | 94 | 95 | 96 | } 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/base/Producer.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.base; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.File; 5 | import java.io.IOException; 6 | import java.nio.file.Files; 7 | import java.util.Properties; 8 | import org.apache.kafka.clients.producer.Callback; 9 | import org.apache.kafka.clients.producer.KafkaProducer; 10 | import org.apache.kafka.clients.producer.ProducerRecord; 11 | import org.apache.kafka.clients.producer.RecordMetadata; 12 | import org.kafkaparser.pojo.Data; 13 | 14 | public class Producer { 15 | 16 | private static KafkaProducer producer; 17 | public static void initialize(File configDirectory) throws IOException 18 | { 19 | 20 | Properties properties = new Properties(); 21 | properties.load(new ByteArrayInputStream(Files.readAllBytes(new File(configDirectory, "producer.properties").toPath()))); 22 | producer = new KafkaProducer<>(properties); 23 | } 24 | 25 | public static void send(Data data,String topic) 26 | { 27 | //do parsing of the urls from regex here 28 | try { 29 | 30 | producer.send(new ProducerRecord(topic, data), new Callback() { 31 | public void onCompletion(RecordMetadata metadata, Exception e) { 32 | if (e != null) { 33 | e.printStackTrace(); 34 | } 35 | // System.out.println("Sent: Partition: " + metadata.partition() + ", Offset: " 36 | // + metadata.offset()); 37 | } 38 | }); 39 | } catch (Exception e) { 40 | e.printStackTrace(); 41 | } 42 | 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/deseralize/DataDeserializer.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.deseralize; 2 | 3 | import java.util.Map; 4 | 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import org.kafkaparser.pojo.Data; 7 | 8 | @SuppressWarnings("unchecked") 9 | public class DataDeserializer implements org.apache.kafka.common.serialization.Deserializer{ 10 | 11 | @Override 12 | public T deserialize(String var1, byte[] arg1) { 13 | ObjectMapper mapper = new ObjectMapper(); 14 | Data data=null; 15 | 16 | try { 17 | data = mapper.readValue(arg1, Data.class); 18 | } catch (Exception e) { 19 | 20 | e.printStackTrace(); 21 | } 22 | return (T) data; 23 | } 24 | 25 | @Override 26 | public void close() { 27 | // TODO Auto-generated method stub 28 | 29 | } 30 | 31 | @Override 32 | public void configure(@SuppressWarnings("rawtypes") Map arg0, boolean arg1) { 33 | // TODO Auto-generated method stub 34 | 35 | } 36 | 37 | 38 | } 39 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/pojo/Data.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.pojo; 2 | 3 | import java.util.ArrayList; 4 | 5 | import com.fasterxml.jackson.annotation.JsonAutoDetect; 6 | import com.fasterxml.jackson.annotation.JsonCreator; 7 | import com.fasterxml.jackson.annotation.JsonProperty; 8 | 9 | @JsonAutoDetect 10 | public class Data { 11 | 12 | private ArrayList searchTerms; 13 | private String url; 14 | 15 | private String botName="RTS"; 16 | private String trufflehogregex="true"; 17 | private String trufflehogentropy="false"; 18 | 19 | 20 | public String getTrufflehogregex() { 21 | return trufflehogregex; 22 | } 23 | public void setTrufflehogregex(String trufflehogregex) { 24 | this.trufflehogregex = trufflehogregex; 25 | } 26 | public String getTrufflehogentropy() { 27 | return trufflehogentropy; 28 | } 29 | public void setTrufflehogentropy(String trufflehogentropy) { 30 | this.trufflehogentropy = trufflehogentropy; 31 | } 32 | 33 | public String getBotName() { 34 | return botName; 35 | } 36 | public void setBotName(String botName) { 37 | this.botName = botName; 38 | } 39 | public ArrayList getSearchTerms() { 40 | return searchTerms; 41 | } 42 | public void setSearchTerms(ArrayList searchTerms) { 43 | this.searchTerms = searchTerms; 44 | } 45 | 46 | @JsonCreator 47 | public Data(@JsonProperty("url")String url,@JsonProperty("searchTerms")ArrayList searchTerms,@JsonProperty("botName") String botName, 48 | @JsonProperty("trufflehogregex") String trufflehogregex,@JsonProperty("trufflehogentropy") String trufflehogentropy) 49 | { 50 | this.url=url; 51 | this.searchTerms=searchTerms; 52 | this.botName = botName; 53 | this.trufflehogentropy=trufflehogentropy; 54 | this.trufflehogregex=trufflehogregex; 55 | 56 | } 57 | public String getUrl() { 58 | return url; 59 | } 60 | public void setUrl(String url) { 61 | this.url = url; 62 | } 63 | 64 | 65 | 66 | 67 | 68 | @Override public String toString() 69 | { 70 | return "data is "+url+"cool man"; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/serialize/DataSerializer.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.serialize; 2 | 3 | import java.util.Map; 4 | 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | 7 | public class DataSerializer implements org.apache.kafka.common.serialization.Serializer{ 8 | 9 | @Override 10 | public void close() { 11 | // TODO Auto-generated method stub 12 | 13 | } 14 | 15 | @Override 16 | public void configure(@SuppressWarnings("rawtypes") Map arg0, boolean arg1) { 17 | // TODO Auto-generated method stub 18 | 19 | } 20 | 21 | @Override 22 | public byte[] serialize(String arg0, Object arg1) { 23 | byte[] retVal = null; 24 | ObjectMapper objectMapper = new ObjectMapper(); 25 | try { 26 | retVal = objectMapper.writeValueAsString(arg1).getBytes(); 27 | } catch (Exception e) { 28 | e.printStackTrace(); 29 | } 30 | return retVal; 31 | } 32 | 33 | 34 | 35 | } 36 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/utilities/ConfigData.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.utilities; 2 | 3 | import java.util.List; 4 | 5 | public class ConfigData { 6 | 7 | public static String consumerPropertiesFileName = "consumer.properties"; 8 | public static String emailPropertiesFileName = "email.properties"; 9 | public static String producerPropertiesFileName = "producer.properties"; 10 | public static String proxyPropertiesFileName = "proxy.properties"; 11 | public static String scanner_configurationPropertiesFileName = "scanner-configuration.properties"; 12 | public static String useragents_listPropertiesFileName = "useragents-list.txt"; 13 | public static String configDirectory =null; 14 | public static String topicName=null; 15 | public static String pythonPath=""; 16 | public static String trufflehogPath=""; 17 | public static List userAgents; 18 | } 19 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/utilities/ConfigParams.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.utilities; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.Properties; 6 | 7 | import com.google.common.io.Resources; 8 | 9 | public class ConfigParams { 10 | 11 | public static String proxy_ip; 12 | public static String proxy_port; 13 | 14 | public static void initialzie() throws IOException 15 | { 16 | /** try (InputStream props = Resources.getResource("config.props").openStream()) { 17 | Properties properties = new Properties(); 18 | properties.load(props); 19 | proxy_ip=properties.getProperty("proxy_ip"); 20 | proxy_port=properties.getProperty("proxy_port"); 21 | System.setProperty("http.proxySet", "true"); 22 | System.setProperty("http.proxyHost",proxy_ip) ; 23 | System.setProperty("http.proxyPort", proxy_port) ; 24 | System.setProperty("https.proxySet", "true"); 25 | System.setProperty("https.proxyHost",proxy_ip) ; 26 | System.setProperty("https.proxyPort", proxy_port) ; 27 | }**/ 28 | } 29 | 30 | 31 | 32 | } 33 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/utilities/DbUtil.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.utilities; 2 | 3 | import java.text.DateFormat; 4 | import java.text.SimpleDateFormat; 5 | import java.util.Date; 6 | import java.util.Set; 7 | 8 | import javax.persistence.TypedQuery; 9 | 10 | import org.kafkaparser.pojo.Data; 11 | import org.sqlite.dataaccess.entity.Result; 12 | import org.sqlite.dataaccess.entity.SearchItem; 13 | import org.sqlite.dataaccess.util.DaoUtil; 14 | import org.sqlite.dataaccess.util.EMfactory; 15 | 16 | public class DbUtil { 17 | 18 | private static DateFormat df = new SimpleDateFormat("dd/MM/yy HH:mm:ss"); 19 | private static Date dateobj = new Date(); 20 | private static String FUTURE_IMPLEMENTATION = "This is for future implementation"; 21 | 22 | private static final SearchItem getSearchItem(final SearchItem searchItem) { 23 | try { 24 | final String query = "SELECT search_item FROM SearchItem search_item where search_item.searchItem=:searchItem"; 25 | final TypedQuery typedQuery = EMfactory.em.createQuery(query, SearchItem.class); 26 | typedQuery.setParameter("searchItem", searchItem.getSearchItem()); 27 | return typedQuery.getResultList().stream().findFirst().orElse(null); 28 | } catch (Exception e) { 29 | e.printStackTrace(); 30 | } 31 | return null; 32 | } 33 | 34 | public static void addNewEntry(Set termsFound, Data data) { 35 | 36 | final Result result = new Result(); 37 | result.setSearchedTerms(termsFound); 38 | result.setSearchedtext(FUTURE_IMPLEMENTATION); 39 | result.setBotName(data.getBotName()); 40 | result.setUrl(data.getUrl()); 41 | result.setTime(df.format(dateobj).toString()); 42 | SearchItem item = null; 43 | for (SearchItem searchItem : termsFound) { 44 | item = getSearchItem(searchItem); 45 | if(item != null) { 46 | searchItem.setId(item.getId()); 47 | } 48 | searchItem.addResult(result); 49 | } 50 | if(item != null) { 51 | DaoUtil.merge(result); 52 | } else { 53 | DaoUtil.insert(result); 54 | } 55 | 56 | } 57 | 58 | public static void addNewEntry(Set termsFound, String url) { 59 | 60 | final Result result = new Result(); 61 | result.setSearchedTerms(termsFound); 62 | result.setSearchedtext(FUTURE_IMPLEMENTATION); 63 | result.setBotName("Future"); 64 | result.setUrl(url); 65 | result.setTime(df.format(dateobj).toString()); 66 | for (SearchItem searchItem : termsFound) { 67 | final SearchItem item = getSearchItem(searchItem); 68 | searchItem.setId(item.getId()); 69 | searchItem.addResult(result); 70 | } 71 | DaoUtil.insert(result); 72 | } 73 | 74 | public static void addNewEntry(Set termsFound, String url, String botName) { 75 | 76 | final Result result = new Result(); 77 | result.setSearchedTerms(termsFound); 78 | result.setSearchedtext(FUTURE_IMPLEMENTATION); 79 | result.setBotName(botName); 80 | result.setUrl(url); 81 | result.setTime(df.format(dateobj).toString()); 82 | for (SearchItem searchItem : termsFound) { 83 | final SearchItem item = getSearchItem(searchItem); 84 | searchItem.setId(item.getId()); 85 | searchItem.addResult(result); 86 | } 87 | DaoUtil.insert(result); 88 | } 89 | 90 | public static void addNewEntry(Set termsFound, String url, String botName, Boolean isValid) { 91 | final Result result = new Result(); 92 | result.setSearchedTerms(termsFound); 93 | result.setSearchedtext(FUTURE_IMPLEMENTATION); 94 | result.setBotName(botName); 95 | result.setUrl(url); 96 | result.setTime(df.format(dateobj).toString()); 97 | result.setIsValid(isValid); 98 | for (SearchItem searchItem : termsFound) { 99 | final SearchItem item = getSearchItem(searchItem); 100 | searchItem.setId(item.getId()); 101 | searchItem.addResult(result); 102 | } 103 | DaoUtil.insert(result); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/utilities/Git.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.utilities; 2 | 3 | 4 | import java.io.BufferedReader; 5 | import java.io.IOException; 6 | import java.io.InputStream; 7 | import java.io.InputStreamReader; 8 | import java.nio.file.Files; 9 | import java.nio.file.Path; 10 | import java.nio.file.Paths; 11 | import java.util.Objects; 12 | 13 | public class Git { 14 | 15 | // example of usage 16 | private static void initAndAddFile() throws IOException, InterruptedException { 17 | Path directory = Paths.get("c:\\temp\\example"); 18 | Files.createDirectories(directory); 19 | gitInit(directory); 20 | Files.write(directory.resolve("example.txt"), new byte[0]); 21 | gitStage(directory); 22 | gitCommit(directory, "Add example.txt"); 23 | } 24 | 25 | // example of usage 26 | private static void cloneAndAddFile() throws IOException, InterruptedException { 27 | String originUrl = "https://github.com/Crydust/TokenReplacer.git"; 28 | Path directory = Paths.get("c:\\temp\\TokenReplacer"); 29 | gitClone(directory, originUrl); 30 | Files.write(directory.resolve("example.txt"), new byte[0]); 31 | gitStage(directory); 32 | gitCommit(directory, "Add example.txt"); 33 | gitPush(directory); 34 | } 35 | 36 | public static void gitInit(Path directory) throws IOException, InterruptedException { 37 | runCommand(directory, "git", "init"); 38 | } 39 | 40 | public static void gitStage(Path directory) throws IOException, InterruptedException { 41 | runCommand(directory, "git", "add", "-A"); 42 | } 43 | 44 | public static void gitCommit(Path directory, String message) throws IOException, InterruptedException { 45 | runCommand(directory, "git", "commit", "-m", message); 46 | } 47 | 48 | public static void gitPush(Path directory) throws IOException, InterruptedException { 49 | runCommand(directory, "git", "push"); 50 | } 51 | 52 | public static void gitClone(Path directory, String originUrl) throws IOException, InterruptedException { 53 | runCommand(directory.getParent(), "git", "clone", originUrl, directory.getFileName().toString()); 54 | } 55 | 56 | public static void runCommand(Path directory, String... command) throws IOException, InterruptedException { 57 | Objects.requireNonNull(directory, "directory"); 58 | if (!Files.exists(directory)) { 59 | throw new RuntimeException("can't run command in non-existing directory '" + directory + "'"); 60 | } 61 | 62 | ProcessBuilder pb = new ProcessBuilder() 63 | .command(command) 64 | .directory(directory.toFile()); 65 | Process p = pb.start(); 66 | StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), "ERROR"); 67 | StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), "OUTPUT"); 68 | outputGobbler.start(); 69 | errorGobbler.start(); 70 | int exit = p.waitFor(); 71 | errorGobbler.join(); 72 | outputGobbler.join(); 73 | p.destroy(); 74 | if (exit != 0) { 75 | throw new AssertionError(String.format("runCommand returned %d", exit)); 76 | } 77 | 78 | } 79 | 80 | private static class StreamGobbler extends Thread { 81 | 82 | private final InputStream is; 83 | private final String type; 84 | 85 | private StreamGobbler(InputStream is, String type) { 86 | this.is = is; 87 | this.type = type; 88 | } 89 | 90 | @Override 91 | public void run() { 92 | try (BufferedReader br = new BufferedReader(new InputStreamReader(is));) { 93 | String line; 94 | while ((line = br.readLine()) != null) { 95 | //System.out.println(type + "> " + line); 96 | } 97 | } catch (IOException ioe) { 98 | ioe.printStackTrace(); 99 | } 100 | } 101 | } 102 | 103 | } 104 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/utilities/HttpUtilities.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.utilities; 2 | 3 | import java.util.List; 4 | import java.io.BufferedReader; 5 | import java.io.File; 6 | import java.io.FileNotFoundException; 7 | import java.io.IOException; 8 | import java.io.InputStreamReader; 9 | import java.net.HttpURLConnection; 10 | import java.net.URISyntaxException; 11 | import java.net.URL; 12 | import java.nio.charset.Charset; 13 | import java.nio.file.Files; 14 | import java.nio.file.Paths; 15 | import java.util.ArrayList; 16 | import java.util.Random; 17 | import java.util.Scanner; 18 | 19 | import com.google.common.io.Resources; 20 | 21 | 22 | 23 | 24 | public class HttpUtilities { 25 | 26 | 27 | 28 | 29 | private static Random rand = new Random(); 30 | 31 | 32 | 33 | 34 | public static void main(String [] args) throws IOException, InterruptedException 35 | { 36 | //ConfigParams.initialzie(); 37 | HttpUtilities test=new HttpUtilities(); 38 | System.out.println(test.sendGet("https://pastebin.com/archive")); 39 | //Search.extractRegexMatches(test.sendGet("https://pastebin.com/archive"), "href=\"/(\\w{8})\">"); 40 | //HttpUtilities.parse("href=\"/(\\w{8})\">", test.sendGet("https://pastebin.com/archive")); 41 | 42 | } 43 | 44 | 45 | 46 | public static String sendGet(String url) throws InterruptedException 47 | { 48 | 49 | 50 | 51 | StringBuffer response =null; 52 | int numberofattempts=0; 53 | boolean recievedResponse=false; 54 | 55 | while(numberofattempts<5 && !recievedResponse) 56 | { 57 | 58 | try { 59 | URL obj = new URL(url); 60 | HttpURLConnection con = (HttpURLConnection) obj.openConnection(); 61 | con.setRequestMethod("GET"); 62 | con.setRequestProperty("User-Agent", ConfigData.userAgents.get(rand.nextInt(ConfigData.userAgents.size()))); 63 | //con.setRequestProperty("User-Agent", User_Agent); 64 | BufferedReader in = new BufferedReader( 65 | new InputStreamReader(con.getInputStream())); 66 | String inputLine; 67 | response = new StringBuffer(); 68 | 69 | while ((inputLine = in.readLine()) != null) { 70 | response.append(inputLine); 71 | } 72 | in.close(); 73 | con.disconnect(); 74 | recievedResponse=true; 75 | } catch (Exception e) { 76 | // TODO Auto-generated catch block 77 | //e.printStackTrace(); 78 | numberofattempts+=1; 79 | if(numberofattempts==5) 80 | { 81 | System.out.println("Tried "+numberofattempts+" times and could not fetch data for :"+url); 82 | } 83 | Thread.sleep(10000); 84 | } 85 | } 86 | //print result 87 | if(response!=null) 88 | { 89 | return response.toString(); 90 | } 91 | else 92 | { 93 | return "Failed to fetch response for url :"+url; 94 | } 95 | } 96 | 97 | } 98 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/utilities/PastieParseAndSearch.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.utilities; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | 6 | import org.kafkaparser.pojo.Data; 7 | 8 | public class PastieParseAndSearch { 9 | 10 | static 11 | { 12 | try { 13 | ConfigParams.initialzie(); 14 | } catch (IOException e) { 15 | // TODO Auto-generated catch block 16 | e.printStackTrace(); 17 | } 18 | } 19 | 20 | public static ArrayList fetchids(String archiveUrl,String regex) throws IOException, InterruptedException 21 | { 22 | //ConfigParams.initialzie(); 23 | //System.out.println(archiveUrl); 24 | //System.out.println(HttpUtilities.sendGet(archiveUrl)); 25 | ArrayList ids= Search.extractRegexMatches(HttpUtilities.sendGet(archiveUrl), regex); 26 | /**for (String each:ids) 27 | { 28 | System.out.println(each); 29 | }**/ 30 | return ids; 31 | } 32 | 33 | public static void searchEachid(Data data) throws IOException 34 | { 35 | Search.find(data); 36 | /** 37 | if(termsFound.size()<=0) 38 | return false; 39 | else 40 | return true; 41 | */ 42 | } 43 | 44 | public static void main (String [] args) throws InterruptedException 45 | { 46 | ArrayList ids; 47 | try { 48 | //ids = PastieParseAndSearch.fetchids("https://pastebin.com/archive", "href=\"/(\\w{8})\">"); 49 | ids = PastieParseAndSearch.fetchids("https://slexy.org/recent", "View paste"); 50 | 51 | 52 | ArrayList list = new ArrayList(); 53 | list.add("method434"); 54 | for(String id:ids) 55 | { 56 | System.out.println("Testing currently"+id); 57 | // Data temp=PastieParseAndSearch.searchEachid(new Data("https://pastebin.com/raw/{id}".replace("{id}", id),list)); 58 | // if(temp!=null) 59 | // { 60 | // System.out.print("found in "+temp.getUrl()); 61 | // } 62 | // else 63 | // { 64 | // System.out.println("notfound"); 65 | // } 66 | } 67 | } catch (IOException e) { 68 | // TODO Auto-generated catch block 69 | e.printStackTrace(); 70 | } 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/utilities/Search.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.utilities; 2 | 3 | import java.io.IOException; 4 | import java.text.DateFormat; 5 | import java.text.SimpleDateFormat; 6 | import java.util.ArrayList; 7 | import java.util.Date; 8 | import java.util.regex.Pattern; 9 | import org.kafkaparser.pojo.Data; 10 | 11 | import java.util.regex.Matcher; 12 | 13 | public class Search 14 | { 15 | 16 | private static DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); 17 | private static Date date = new Date(); 18 | private static DateFormat df = new SimpleDateFormat("dd/MM/yy HH:mm:ss"); 19 | private static Date dateobj = new Date(); 20 | 21 | 22 | public static void find(Data data) throws IOException 23 | { 24 | //System.out.println(data.getUrl()); 25 | SearchThread searchThread=new SearchThread(); 26 | searchThread.initialize(data); 27 | new Thread(searchThread).start(); 28 | /** 29 | String response=HttpUtilities.sendGet(data.getUrl()); 30 | ArrayList termsFound = new ArrayList(); 31 | for(String s:data.getSearchTerms()) 32 | { 33 | if(response.contains(s)) 34 | { 35 | termsFound.add(s); 36 | } 37 | } 38 | if(termsFound.size()>0) 39 | { 40 | //check if multiple threads are resulting in reading same data again and again over ok 41 | System.out.println(df.format(dateobj)+"found in **************************************************"+data.getUrl()+" data found is "+termsFound.get(0));**/ 42 | 43 | /**if(!DaoUtil.searchDuplicateByUrl(data.getUrl())) 44 | { 45 | EmailUtility.sendEmailUsingGmail("Later", data.getUrl(), termsFound); 46 | Result result = new Result(); 47 | result.setSearchedTerms(termsFound); 48 | result.setSearchedtext("This is for future implementation"); 49 | result.setUrl(data.getUrl()); 50 | result.setTime(dateFormat.format(date).toString()); 51 | DaoUtil.insert(result); 52 | } 53 | }**/ 54 | //return termsFound; 55 | } 56 | 57 | public static ArrayList extractRegexMatches(String response,String regex) 58 | { 59 | ArrayList matches= new ArrayList(); 60 | Pattern pattern = Pattern.compile(regex,Pattern.MULTILINE); 61 | Matcher matcher = pattern.matcher(response); 62 | while (matcher.find()) { 63 | matches.add(matcher.group(1)); 64 | } 65 | 66 | return matches; 67 | } 68 | 69 | 70 | 71 | } 72 | -------------------------------------------------------------------------------- /kafka-parser/src/main/java/org/kafkaparser/utilities/TruffleHog.java: -------------------------------------------------------------------------------- 1 | package org.kafkaparser.utilities; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | import java.util.HashSet; 7 | import java.util.Set; 8 | import java.util.concurrent.TimeUnit; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | 12 | import org.sqlite.dataaccess.entity.SearchItem; 13 | import org.sqlite.dataaccess.util.DaoUtil; 14 | 15 | public class TruffleHog implements Runnable { 16 | 17 | private static String regexForSecret = "stringsFound\": (.*)}"; 18 | 19 | private String pastielink; 20 | private String searchTerm; 21 | private String profile; 22 | private String regex; 23 | private String entropy; 24 | private String botName; 25 | private String filePath; 26 | 27 | public void initilaize(String filePath, String pastielink, String searchTerm, String profile, String regex, 28 | String entropy) { 29 | this.filePath = filePath; 30 | this.pastielink = pastielink; 31 | this.searchTerm = searchTerm; 32 | this.profile = profile; 33 | this.regex = regex; 34 | this.entropy = entropy; 35 | 36 | } 37 | 38 | public Set getSecrets() throws IOException, InterruptedException { 39 | final Set secretSet = new HashSet(); 40 | 41 | if (regex.equals("false")) { 42 | regex = ""; 43 | } else { 44 | regex = "--regex"; 45 | } 46 | String[] cmd = { 47 | // "/usr/local/bin/python2.7", 48 | // "/usr/bin/python2.7", 49 | // "/Users/n0r00ij/Downloads/truffleHog-dev/truffleHog/truffleHog/truffleHog.py", 50 | // ConfigData.pythonPath, 51 | // ConfigData.trufflehogPath, 52 | "trufflehog", regex, "--cleanup", "--entropy=" + entropy, "--json", "file://" + filePath }; 53 | 54 | Process p = Runtime.getRuntime().exec(cmd); 55 | // p.waitFor(); 56 | BufferedReader bri = new BufferedReader(new InputStreamReader(p.getInputStream())); 57 | BufferedReader bre = new BufferedReader(new InputStreamReader(p.getErrorStream())); 58 | String line; 59 | while ((line = bri.readLine()) != null) { 60 | // System.out.println(line); 61 | // System.out.println(); 62 | secretSet.addAll(extractRegexMatches(line, regexForSecret)); 63 | // System.out.println(line); 64 | 65 | } 66 | bri.close(); 67 | while ((line = bre.readLine()) != null) { 68 | // System.out.println(line); 69 | secretSet.addAll(extractRegexMatches(line, regexForSecret)); 70 | // System.out.println(line); 71 | 72 | } 73 | bre.close(); 74 | p.waitFor(5, TimeUnit.MINUTES); 75 | 76 | p.destroyForcibly(); 77 | 78 | // p.destroy(); 79 | 80 | Boolean is_Valid = false; 81 | if (secretSet.size() > 0) { 82 | System.out.println("Issues have been found ************* Sending email"); 83 | Set temp = new HashSet(); 84 | temp.add(pastielink); 85 | EmailUtility.sendEmailUsingGmail(profile, temp, searchTerm); 86 | is_Valid = true; 87 | } 88 | 89 | /** 90 | * if(regex.toLowerCase().equals("false") && 91 | * this.entropy.toLowerCase().equals("false")) { 92 | * 93 | * } 94 | **/ 95 | 96 | if (!DaoUtil.searchDuplicateByUrl(pastielink)) { 97 | DbUtil.addNewEntry(secretSet, pastielink, profile, is_Valid); 98 | 99 | } 100 | return secretSet; 101 | } 102 | 103 | public static Set extractRegexMatches(String line, String regex) { 104 | final Set matchSet = new HashSet(); 105 | Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE); 106 | Matcher matcher = pattern.matcher(line); 107 | while (matcher.find()) { 108 | final SearchItem searchItem = new SearchItem(); 109 | searchItem.setSearchItem(matcher.group(1)); 110 | matchSet.add(searchItem); 111 | } 112 | return matchSet; 113 | } 114 | 115 | @Override 116 | public void run() { 117 | // TODO Auto-generated method stub 118 | try { 119 | getSecrets(); 120 | } catch (IOException | InterruptedException e) { 121 | // TODO Auto-generated catch block 122 | e.printStackTrace(); 123 | } 124 | 125 | } 126 | 127 | /** 128 | * public static void main(String args[]) { try { 129 | * System.out.println(getSecrets("https://github.com/cogdog/tweets.git")); } 130 | * catch (IOException e) { // TODO Auto-generated catch block 131 | * e.printStackTrace(); } catch (InterruptedException e) { // TODO 132 | * Auto-generated catch block e.printStackTrace(); } } 133 | **/ 134 | 135 | } 136 | -------------------------------------------------------------------------------- /kafka-parser/src/test/java/org/kafka/parser/AppTest.java: -------------------------------------------------------------------------------- 1 | package org.kafka.parser; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | com.rts.scrap 6 | rts 7 | 1.0-SNAPSHOT 8 | pom 9 | 10 | 11 | sqlite-dataaccess 12 | kafka-parser 13 | rts-base 14 | rts-impl 15 | scraptool 16 | 17 | 18 | 19 | 20 | 21 | junit 22 | junit 23 | 4.12 24 | test 25 | 26 | 27 | 28 | 29 | 30 | 1.8 31 | 1.8 32 | UTF-8 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /rts-base/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /rts-base/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /rts-base/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | rts-base 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /rts-base/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/test/java=UTF-8 4 | encoding/=UTF-8 5 | -------------------------------------------------------------------------------- /rts-base/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /rts-base/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.rts.scrap 7 | rts 8 | 1.0-SNAPSHOT 9 | 10 | com.rts.scrap 11 | rts-base 12 | 1.0-SNAPSHOT 13 | rts-base 14 | http://maven.apache.org 15 | 16 | UTF-8 17 | 18 | 19 | 20 | junit 21 | junit 22 | 3.8.1 23 | test 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /rts-base/src/main/java/org/rts/base/Scrapper.java: -------------------------------------------------------------------------------- 1 | package org.rts.base; 2 | 3 | import java.util.Properties; 4 | 5 | /** 6 | Scrapper is a entity which is to scrap a given pastie,twitter,github etc.. 7 | All scrappers have to implement this interface. it is mandatory to extend for best practices and to allow loading of classes dynamically. 8 | If some one needs to extend this framework then it can be done easily by implementing this interface and Scrapperprofile interface. 9 | **/ 10 | public interface Scrapper extends Runnable { 11 | 12 | //This function is used to initialize the scrapper 13 | 14 | public void initScrapper(Properties prop); 15 | 16 | 17 | //This function does the atual work working with kafka 18 | public void kickOffActualWork(); 19 | 20 | //This function is used to stop scrapper from running 21 | public boolean stopScrapper(); 22 | 23 | //This function is used to stop scrapper from running 24 | public boolean restart(); 25 | 26 | } 27 | -------------------------------------------------------------------------------- /rts-base/src/main/java/org/rts/base/ScrapperImpl.java: -------------------------------------------------------------------------------- 1 | package org.rts.base; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.File; 5 | import java.io.IOException; 6 | import java.nio.file.Files; 7 | import java.util.HashMap; 8 | import java.util.Map; 9 | import java.util.Properties; 10 | 11 | import org.rts.base.profileregistry.ScrapperProfileRegistry; 12 | import org.rts.base.utilities.PropertyUtilities;; 13 | 14 | public class ScrapperImpl { 15 | 16 | public boolean initlialized; 17 | private Map scrapperMap = new HashMap(); 18 | private static ScrapperImpl instance; 19 | private Properties properties = new Properties(); 20 | 21 | public Map getScrapperMap() 22 | { 23 | return scrapperMap; 24 | } 25 | 26 | public static synchronized ScrapperImpl getInstance() { 27 | if (instance == null) { 28 | instance = new ScrapperImpl(); 29 | } 30 | return instance; 31 | } 32 | 33 | public synchronized void initialize (File configDirectory) throws IOException 34 | { 35 | Properties prop = new Properties(); 36 | prop.setProperty("configDirectory", configDirectory.getAbsolutePath()); 37 | prop.load(new ByteArrayInputStream(Files.readAllBytes(new File(configDirectory, "scanner-configuration.properties").toPath()))); 38 | initialize(prop); 39 | 40 | } 41 | 42 | public synchronized void initialize (Properties prop)throws IOException 43 | { 44 | properties = new Properties(); 45 | properties.putAll(prop); 46 | Map scannerPropertiesMap = PropertyUtilities.propertiesGroupByFirstDot(PropertyUtilities.filterAndShiftByFirstDot(properties, "scrapper")); 47 | try{ 48 | for(Map.Entry entry:scannerPropertiesMap.entrySet()) 49 | { 50 | String profile = entry.getValue().getProperty("profile"); 51 | scrapperMap.put(profile, ScrapperProfileRegistry.newScrapProfile(profile, entry.getValue())); 52 | } 53 | } 54 | 55 | finally 56 | { 57 | // This is yet to be implemented and i will decide on this later man 58 | } 59 | initlialized=true; 60 | 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /rts-base/src/main/java/org/rts/base/ScrapperProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.base; 2 | 3 | import java.util.Properties; 4 | 5 | /** 6 | * @author rudrapna 7 | *ScrapperProfile interface is implemented by profilers of Scrappers whicg returns new instance of a Scrapper. All Scrappers are loaded dynamically 8 | *using class loader. 9 | */ 10 | 11 | public interface ScrapperProfile { 12 | 13 | // This function is used to get the name of 14 | public String getName(); 15 | 16 | Scrapper newInstance(Properties properties); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /rts-base/src/main/java/org/rts/base/exceptions/ScrapperNotvalidException.java: -------------------------------------------------------------------------------- 1 | package org.rts.base.exceptions; 2 | 3 | public class ScrapperNotvalidException extends RuntimeException{ 4 | public ScrapperNotvalidException() 5 | { 6 | 7 | } 8 | 9 | } 10 | -------------------------------------------------------------------------------- /rts-base/src/main/java/org/rts/base/profileregistry/ScrapperProfileRegistry.java: -------------------------------------------------------------------------------- 1 | package org.rts.base.profileregistry; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import java.util.Properties; 6 | import java.util.ServiceLoader; 7 | 8 | import org.rts.base.Scrapper; 9 | import org.rts.base.ScrapperProfile; 10 | 11 | public class ScrapperProfileRegistry { 12 | 13 | static Map scrapperProfileMap = new HashMap(); 14 | 15 | static { 16 | //This is a service loader used to load all scanning profiless 17 | ServiceLoader loadSP = ServiceLoader.load(ScrapperProfile.class, ScrapperProfile.class.getClassLoader()); 18 | for (ScrapperProfile scrapperProfile : loadSP) { 19 | scrapperProfileMap.put(scrapperProfile.getName(), scrapperProfile); 20 | } 21 | } 22 | 23 | public static Scrapper newScrapProfile(String profile,Properties properties) 24 | { 25 | ScrapperProfile sp=scrapperProfileMap.get(profile); 26 | if (sp==null) 27 | { 28 | throw new RuntimeException("ScannerProfile "+profile+" is not defined"); 29 | } 30 | return sp.newInstance(properties); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /rts-base/src/main/java/org/rts/base/utilities/PropertyUtilities.java: -------------------------------------------------------------------------------- 1 | package org.rts.base.utilities; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import java.util.Properties; 6 | import java.util.regex.Pattern; 7 | 8 | 9 | public class PropertyUtilities { 10 | private static final Pattern DOT_REGEXP = Pattern.compile("\\."); 11 | 12 | public static Properties filterAndShiftByFirstDot(Properties p, String prefix) { 13 | Properties result = new Properties(); 14 | for (String key : p.stringPropertyNames()) { 15 | String[] split = splitByFirstDot(key); 16 | String kfirst=split[0]; 17 | if (prefix.equals(kfirst)) { 18 | String knew = split.length > 1 ? split[1] : ""; 19 | result.setProperty(knew, p.getProperty(key)); 20 | } 21 | } 22 | return result; 23 | } 24 | 25 | public static Map propertiesGroupByFirstDot(Properties p) { 26 | HashMap result = new HashMap(); 27 | for (String key : p.stringPropertyNames()) { 28 | String[] split = splitByFirstDot(key); 29 | String kfirst=split[0]; 30 | String knew=split.length>1 ? split[1] : ""; 31 | Properties vp; 32 | if (result.containsKey(kfirst)) { 33 | vp = result.get(kfirst); 34 | } else { 35 | vp = new Properties(); 36 | result.put(kfirst, vp); 37 | } 38 | vp.setProperty(knew, p.getProperty(key)); 39 | } 40 | return result; 41 | } 42 | 43 | public static String[] splitByFirstDot(String s) { 44 | return DOT_REGEXP.split(s, 2); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /rts-base/src/test/java/org/rts/base/AppTest.java: -------------------------------------------------------------------------------- 1 | package org.rts.base; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /rts-impl/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /rts-impl/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /rts-impl/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | rts-impl 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /rts-impl/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/main/resources=UTF-8 4 | encoding//src/test/java=UTF-8 5 | encoding/=UTF-8 6 | -------------------------------------------------------------------------------- /rts-impl/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /rts-impl/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.rts.scrap 7 | rts 8 | 1.0-SNAPSHOT 9 | 10 | com.rts.scrap 11 | rts-impl 12 | 1.0-SNAPSHOT 13 | rts-impl 14 | 15 | 16 | http://maven.apache.org 17 | 18 | UTF-8 19 | 20 | 21 | 22 | 23 | junit 24 | junit 25 | 4.9 26 | test 27 | 28 | 29 | 30 | log4j 31 | log4j 32 | 1.2.17 33 | 34 | 35 | commons-cli 36 | commons-cli 37 | 1.4 38 | 39 | 40 | com.github.jreddit 41 | jreddit 42 | 1.0.3 43 | 44 | 45 | com.googlecode.json-simple 46 | json-simple 47 | 1.1 48 | 49 | 50 | com.rts.scrap 51 | kafka-parser 52 | 1.0-SNAPSHOT 53 | 54 | 55 | 56 | 57 | com.rts.scrap 58 | rts-base 59 | 1.0-SNAPSHOT 60 | 61 | 62 | org.twitter4j 63 | twitter4j-stream 64 | 4.0.6 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/impl/GithubImpl.java: -------------------------------------------------------------------------------- 1 | package org.rts.impl; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.HashSet; 6 | import java.util.Properties; 7 | import java.util.Set; 8 | 9 | import org.apache.log4j.Logger; 10 | import org.kafkaparser.utilities.DbUtil; 11 | import org.kafkaparser.utilities.EmailUtility; 12 | import org.rts.base.Scrapper; 13 | import org.rts.utilities.JsonParserForGithub; 14 | import org.rts.utilities.TruffleHog; 15 | import org.sqlite.dataaccess.entity.SearchItem; 16 | import org.sqlite.dataaccess.util.DaoUtil; 17 | 18 | public class GithubImpl implements Scrapper { 19 | 20 | private String baseurl; 21 | private String access_token; 22 | private String timetoSleep; 23 | private ArrayList searchTerms=new ArrayList(); 24 | final static Logger logger = Logger.getLogger(GithubImpl.class); 25 | private String profile =""; 26 | private String trufflehogregex="false"; 27 | private String trufflehogentropy="false"; 28 | 29 | public void run() { 30 | // TODO Auto-generated method stub 31 | kickOffActualWork(); 32 | 33 | } 34 | 35 | public void initScrapper(Properties prop) 36 | { 37 | // TODO Auto-generated method stub 38 | this.baseurl=prop.getProperty("baseurl"); 39 | this.access_token=prop.getProperty("access_token"); 40 | this.timetoSleep=prop.getProperty("timetosleep"); 41 | this.searchTerms=new ArrayList(Arrays.asList(prop.getProperty("searchterms").split("\\s*,\\s*"))); 42 | this.profile= prop.getProperty("profile"); 43 | this.trufflehogregex=prop.getProperty("trufflehogregex").toLowerCase(); 44 | this.trufflehogentropy=prop.getProperty("trufflehogentropy").toLowerCase(); 45 | } 46 | 47 | public void kickOffActualWork() { 48 | System.out.println("Kicked off github"); 49 | 50 | 51 | while(true) 52 | { 53 | try { 54 | for (String searchTerm : searchTerms) 55 | { 56 | Set alertSet=JsonParserForGithub.githubUrlFetcher(baseurl.replace("{searchTerm}", searchTerm.replace(" ", "%20"))+"&access_token="+access_token); 57 | System.out.println("Got url" + alertSet); 58 | ArrayList threads= new ArrayList<>(); 59 | if(trufflehogregex.equals("true") || trufflehogentropy.equals("true")) 60 | { 61 | for(String url:alertSet) 62 | { 63 | 64 | { 65 | 66 | if(!DaoUtil.searchDuplicateByUrl(url)) 67 | { 68 | System.out.println("Analyzing url************" + url); 69 | TruffleHog truffleHogThread = new TruffleHog(); 70 | truffleHogThread.initilaize(url, searchTerm,profile,trufflehogregex,trufflehogentropy); 71 | Thread t = new Thread(truffleHogThread); 72 | threads.add(t); 73 | t.start(); 74 | //while(t.isAlive());//This is a bad idea waiting for every thread man 75 | } 76 | } 77 | 78 | } 79 | } 80 | 81 | else if(alertSet.size()>0) 82 | { 83 | Set filteredalertSet = new HashSet(); 84 | for(String url:alertSet) 85 | { 86 | if(!DaoUtil.searchDuplicateByUrl(url)) 87 | { 88 | //System.out.println("Comparing url" + url); 89 | filteredalertSet.add(url); 90 | } 91 | } 92 | EmailUtility.sendEmailUsingGmail(profile, filteredalertSet, searchTerm); 93 | for(String url:filteredalertSet) 94 | { 95 | if(!DaoUtil.searchDuplicateByUrl(url)) 96 | { 97 | final Set temp=new HashSet(); 98 | final SearchItem searchItem = new SearchItem(); 99 | searchItem.setSearchItem(searchTerm); 100 | temp.add(searchItem); 101 | DbUtil.addNewEntry(temp, url,profile); 102 | 103 | } 104 | } 105 | } 106 | 107 | 108 | 109 | 110 | /**if(filteredalertSet.size()>0) 111 | { 112 | //System.out.println("Reuqired terms have been found"); 113 | EmailUtility.sendEmailUsingGmail(profile, filteredalertSet, searchterm); 114 | for(String url:filteredalertSet) 115 | { 116 | if(!DaoUtil.searchDuplicateByUrl(url)) 117 | { 118 | ArrayList temp=new ArrayList(); 119 | temp.add(searchterm); 120 | DbUtil.addNewEntry(temp, url,profile); 121 | 122 | } 123 | } 124 | }**/ 125 | } 126 | Thread.sleep(Integer.parseInt(timetoSleep)); 127 | } catch (Exception e) { 128 | logger.error("kickOffActualWork function in GithubImpl class has thrown exception",e); 129 | } 130 | 131 | System.gc(); 132 | 133 | } 134 | 135 | } 136 | 137 | public boolean stopScrapper() { 138 | // TODO Auto-generated method stub 139 | //enable multiple threads to speed up the process man 140 | return false; 141 | } 142 | 143 | public boolean restart() { 144 | // TODO Auto-generated method stub 145 | return false; 146 | } 147 | 148 | public static void main(String args[]) 149 | { 150 | Thread t=new Thread(new GithubImpl()); 151 | t.run(); 152 | } 153 | 154 | } 155 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/impl/PastieImpl.java: -------------------------------------------------------------------------------- 1 | package org.rts.impl; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.Arrays; 6 | import java.util.HashSet; 7 | import java.util.Iterator; 8 | import java.util.Properties; 9 | import java.util.Queue; 10 | import java.util.Set; 11 | import org.apache.log4j.Logger; 12 | import com.google.common.collect.EvictingQueue; 13 | import org.kafkaparser.base.Producer; 14 | import org.rts.base.Scrapper; 15 | import org.kafkaparser.utilities.ConfigData; 16 | import org.kafkaparser.utilities.PastieParseAndSearch; 17 | import org.kafkaparser.pojo.Data; 18 | 19 | public class PastieImpl implements Scrapper{ 20 | 21 | private String homeurl; 22 | private String regex; 23 | private String downloadurl; 24 | private String timetoSleep; 25 | public Queue evictingQueue= EvictingQueue.create(600); 26 | public Set previousSet = new HashSet(); 27 | public Set presentSet = new HashSet(); 28 | public Set diffSet = new HashSet(); 29 | private ArrayList searchTerms=new ArrayList(); 30 | final static Logger logger = Logger.getLogger(PastieImpl.class); 31 | private String profile=""; 32 | private String trufflehogregex="false"; 33 | private String trufflehogentropy="false"; 34 | 35 | public void initScrapper(Properties prop) { 36 | // TODO Auto-generated method stub 37 | //write a common function to check values of all the variables that it is populated in the config proeprties file man 38 | //maintain previous test ones and compare with present ones before making any requests again man 39 | this.regex=prop.getProperty("regex"); 40 | this.downloadurl=prop.getProperty("downloadurl"); 41 | this.homeurl=prop.getProperty("homeurl"); 42 | this.timetoSleep=prop.getProperty("timetosleep"); 43 | this.searchTerms=new ArrayList(Arrays.asList(prop.getProperty("searchterms").split("\\s*,\\s*"))); 44 | this.profile=prop.getProperty("profile"); 45 | this.trufflehogregex=prop.getProperty("trufflehogregex").toLowerCase(); 46 | this.trufflehogentropy=prop.getProperty("trufflehogentropy").toLowerCase(); 47 | } 48 | 49 | public void run() { 50 | System.out.println("Kicked off "+profile); 51 | try { 52 | while (true) 53 | { 54 | kickOffActualWork(); 55 | Thread.sleep(Integer.parseInt(timetoSleep)); 56 | System.gc(); 57 | } 58 | } catch (InterruptedException e) { 59 | // TODO Auto-generated catch block 60 | logger.error("Pastie Impl run has failed:",e); 61 | } 62 | } 63 | 64 | public void kickOffActualWork() { 65 | // TODO Auto-generated method stub 66 | 67 | ArrayList ids = null; 68 | try { 69 | ids = PastieParseAndSearch.fetchids(homeurl,regex); 70 | presentSet = new HashSet(ids); 71 | //diffSet=Difference.getDiff(previousSet, presentSet); 72 | //previousSet = presentSet; 73 | Iterator iter = presentSet.iterator(); 74 | while (iter.hasNext()) 75 | { String id=(String)iter.next(); 76 | if(!evictingQueue.contains(id)) 77 | { 78 | Producer.send(new Data(downloadurl.replace("{id}", id),searchTerms,profile,trufflehogregex,trufflehogentropy), ConfigData.topicName); 79 | } 80 | 81 | evictingQueue.add(id); 82 | } 83 | } 84 | catch (IOException e) { 85 | // TODO Auto-generated catch block 86 | logger.error("Pastie Impl kickoff has failed:",e); 87 | } catch (InterruptedException e) { 88 | // TODO Auto-generated catch block 89 | e.printStackTrace(); 90 | } 91 | } 92 | 93 | public boolean stopScrapper() { 94 | // TODO Auto-generated method stub 95 | return false; 96 | } 97 | 98 | public boolean restart() { 99 | // TODO Auto-generated method stub 100 | return false; 101 | } 102 | 103 | } 104 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/impl/RedditImpl.java: -------------------------------------------------------------------------------- 1 | package org.rts.impl; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.HashSet; 6 | import java.util.Iterator; 7 | import java.util.Properties; 8 | import java.util.Set; 9 | import org.rts.base.Scrapper; 10 | import org.kafkaparser.utilities.DbUtil; 11 | import org.kafkaparser.utilities.EmailUtility; 12 | import org.rts.utilities.JsonParserForReddit; 13 | import org.rts.utilities.TruffleHog; 14 | import org.sqlite.dataaccess.entity.SearchItem; 15 | import org.sqlite.dataaccess.util.DaoUtil; 16 | 17 | public class RedditImpl implements Scrapper { 18 | 19 | // implement array of searchterms needed here man 20 | private String baseurl; 21 | private String timetoSleep; 22 | private ArrayList searchTerms = new ArrayList(); 23 | private String profile = ""; 24 | private String trufflehogregex = "false"; 25 | private String trufflehogentropy = "false"; 26 | 27 | public void run() { 28 | // TODO Auto-generated method stub 29 | kickOffActualWork(); 30 | 31 | } 32 | 33 | public void initScrapper(Properties prop) { 34 | this.baseurl = prop.getProperty("baseurl"); 35 | this.timetoSleep = prop.getProperty("timetosleep"); 36 | this.searchTerms = new ArrayList(Arrays.asList(prop.getProperty("searchterms").split("\\s*,\\s*"))); 37 | this.profile = prop.getProperty("profile"); 38 | this.trufflehogregex = prop.getProperty("trufflehogregex").toLowerCase(); 39 | this.trufflehogentropy = prop.getProperty("trufflehogentropy").toLowerCase(); 40 | 41 | } 42 | 43 | public void kickOffActualWork() { 44 | while (true) { 45 | try { 46 | for (String searchTerm : searchTerms) { 47 | Set alertSet = JsonParserForReddit 48 | .redditUrlFetcher(baseurl.replace("{searchterm}", searchTerm.replace(" ", "%20"))); 49 | ArrayList threads = new ArrayList<>(); 50 | if (trufflehogregex.equals("true") || trufflehogentropy.equals("true")) { 51 | for (String url : alertSet) { 52 | 53 | { 54 | 55 | if (!DaoUtil.searchDuplicateByUrl(url)) { 56 | System.out.println("Analyzing url************" + url); 57 | TruffleHog truffleHogThread = new TruffleHog(); 58 | truffleHogThread.initilaize(url, searchTerm, profile, trufflehogregex, 59 | trufflehogentropy); 60 | Thread t = new Thread(truffleHogThread); 61 | threads.add(t); 62 | t.start(); 63 | } 64 | } 65 | 66 | } 67 | } 68 | 69 | else if (alertSet.size() > 0) { 70 | Set filteredalertSet = new HashSet(); 71 | for (String url : alertSet) { 72 | if (!DaoUtil.searchDuplicateByUrl(url)) { 73 | // System.out.println("Comparing url" + url); 74 | filteredalertSet.add(url); 75 | } 76 | } 77 | EmailUtility.sendEmailUsingGmail(profile, filteredalertSet, searchTerm); 78 | for (String url : filteredalertSet) { 79 | if (!DaoUtil.searchDuplicateByUrl(url)) { 80 | final Set temp = new HashSet(); 81 | final SearchItem searchItem = new SearchItem(); 82 | searchItem.setSearchItem(searchTerm); 83 | temp.add(searchItem); 84 | DbUtil.addNewEntry(temp, url, profile); 85 | 86 | } 87 | } 88 | } 89 | 90 | } 91 | Thread.sleep(Integer.parseInt(timetoSleep)); 92 | } catch (Exception e) { 93 | // TODO Auto-generated catch block 94 | e.printStackTrace(); 95 | } 96 | System.gc(); 97 | } 98 | 99 | } 100 | 101 | public boolean stopScrapper() { 102 | // TODO Auto-generated method stub 103 | return false; 104 | } 105 | 106 | public boolean restart() { 107 | // TODO Auto-generated method stub 108 | return false; 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/CodepadProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.PastieImpl; 8 | 9 | public class CodepadProfile implements ScrapperProfile{ 10 | 11 | public String getName() { 12 | // TODO Auto-generated method stub 13 | return "Codepad"; 14 | } 15 | 16 | public Scrapper newInstance(Properties properties) { 17 | // TODO Auto-generated method stub 18 | PastieImpl pastie=new PastieImpl(); 19 | pastie.initScrapper(properties); 20 | return pastie; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/Dumpz.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.PastieImpl; 8 | 9 | public class Dumpz implements ScrapperProfile{ 10 | 11 | public String getName() { 12 | // TODO Auto-generated method stub 13 | return "Dumpzorg"; 14 | } 15 | 16 | public Scrapper newInstance(Properties properties) { 17 | // TODO Auto-generated method stub 18 | PastieImpl pastie=new PastieImpl(); 19 | pastie.initScrapper(properties); 20 | return pastie; 21 | } 22 | 23 | } 24 | 25 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/GistGithubProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | import org.rts.base.Scrapper; 5 | import org.rts.base.ScrapperProfile; 6 | import org.rts.impl.PastieImpl; 7 | 8 | public class GistGithubProfile implements ScrapperProfile{ 9 | 10 | public String getName() { 11 | // TODO Auto-generated method stub 12 | return "GistGithub"; 13 | } 14 | 15 | public Scrapper newInstance(Properties properties) { 16 | // TODO Auto-generated method stub 17 | PastieImpl pastie=new PastieImpl(); 18 | pastie.initScrapper(properties); 19 | return pastie; 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/GithubProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | import org.rts.base.Scrapper; 5 | import org.rts.base.ScrapperProfile; 6 | import org.rts.impl.GithubImpl; 7 | 8 | public class GithubProfile implements ScrapperProfile{ 9 | 10 | public String getName() { 11 | // TODO Auto-generated method stub 12 | return "Github"; 13 | } 14 | 15 | public Scrapper newInstance(Properties properties) { 16 | // TODO Auto-generated method stub 17 | 18 | GithubImpl gitHub=new GithubImpl(); 19 | gitHub.initScrapper(properties); 20 | return gitHub; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/IdeonecomProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | import org.rts.base.Scrapper; 5 | import org.rts.base.ScrapperProfile; 6 | import org.rts.impl.PastieImpl; 7 | 8 | public class IdeonecomProfile implements ScrapperProfile { 9 | 10 | public String getName() { 11 | // TODO Auto-generated method stub 12 | return "Ideonecom"; 13 | } 14 | 15 | public Scrapper newInstance(Properties properties) { 16 | // TODO Auto-generated method stub 17 | PastieImpl pastie=new PastieImpl(); 18 | pastie.initScrapper(properties); 19 | return pastie; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/KpastenetProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | import org.rts.base.Scrapper; 5 | import org.rts.base.ScrapperProfile; 6 | import org.rts.impl.PastieImpl; 7 | 8 | public class KpastenetProfile implements ScrapperProfile{ 9 | 10 | public String getName() { 11 | // TODO Auto-generated method stub 12 | return "Kpastenet"; 13 | } 14 | 15 | public Scrapper newInstance(Properties properties) { 16 | // TODO Auto-generated method stub 17 | PastieImpl pastie=new PastieImpl(); 18 | pastie.initScrapper(properties); 19 | return pastie; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/Lpaste.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.PastieImpl; 8 | 9 | public class Lpaste implements ScrapperProfile{ 10 | 11 | public String getName() { 12 | // TODO Auto-generated method stub 13 | return "Lpaste"; 14 | } 15 | 16 | public Scrapper newInstance(Properties properties) { 17 | // TODO Auto-generated method stub 18 | PastieImpl pastie=new PastieImpl(); 19 | pastie.initScrapper(properties); 20 | return pastie; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/PastebincaProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | import org.rts.base.Scrapper; 5 | import org.rts.base.ScrapperProfile; 6 | import org.rts.impl.PastieImpl; 7 | 8 | public class PastebincaProfile implements ScrapperProfile{ 9 | public String getName() { 10 | // TODO Auto-generated method stub 11 | //check with time if pastebin.ca comess up with different results browsers and mine are not matching.. need to identify what is that extra parameter needed man 12 | return "Pastebinca"; 13 | } 14 | 15 | public Scrapper newInstance(Properties properties) { 16 | // TODO Auto-generated method stub 17 | PastieImpl pastie=new PastieImpl(); 18 | pastie.initScrapper(properties); 19 | return pastie; 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/PastebinfrProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | import org.rts.base.Scrapper; 5 | import org.rts.base.ScrapperProfile; 6 | import org.rts.impl.PastieImpl; 7 | 8 | public class PastebinfrProfile implements ScrapperProfile{ 9 | 10 | public String getName() { 11 | // TODO Auto-generated method stub 12 | return "Pastebinfr"; 13 | } 14 | 15 | public Scrapper newInstance(Properties properties) { 16 | // TODO Auto-generated method stub 17 | PastieImpl pastie=new PastieImpl(); 18 | pastie.initScrapper(properties); 19 | return pastie; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/PastebinruProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | import org.rts.base.Scrapper; 5 | import org.rts.base.ScrapperProfile; 6 | import org.rts.impl.PastieImpl; 7 | 8 | public class PastebinruProfile implements ScrapperProfile { 9 | 10 | public String getName() { 11 | // TODO Auto-generated method stub 12 | return "Pastebinru"; 13 | } 14 | 15 | public Scrapper newInstance(Properties properties) { 16 | // TODO Auto-generated method stub 17 | PastieImpl pastie=new PastieImpl(); 18 | pastie.initScrapper(properties); 19 | return pastie; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/Pasteorgru.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.PastieImpl; 8 | 9 | public class Pasteorgru implements ScrapperProfile { 10 | 11 | public String getName() { 12 | // TODO Auto-generated method stub 13 | return "Pasteorgru"; 14 | } 15 | 16 | public Scrapper newInstance(Properties properties) { 17 | // TODO Auto-generated method stub 18 | PastieImpl pastie=new PastieImpl(); 19 | pastie.initScrapper(properties); 20 | return pastie; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/PastieProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.PastieImpl; 8 | 9 | public class PastieProfile implements ScrapperProfile{ 10 | 11 | public String getName() { 12 | // TODO Auto-generated method stub 13 | return "Pastie"; 14 | } 15 | 16 | public Scrapper newInstance(Properties properties) { 17 | // TODO Auto-generated method stub 18 | PastieImpl pastie=new PastieImpl(); 19 | pastie.initScrapper(properties); 20 | return pastie; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/RedditProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.RedditImpl; 8 | 9 | public class RedditProfile implements ScrapperProfile { 10 | 11 | public String getName() { 12 | // TODO Auto-generated method stub 13 | return "Reddit"; 14 | } 15 | 16 | public Scrapper newInstance(Properties properties) { 17 | // TODO Auto-generated method stub 18 | RedditImpl reddit=new RedditImpl(); 19 | reddit.initScrapper(properties); 20 | return reddit; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/SlexyOrgProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.PastieImpl; 8 | 9 | public class SlexyOrgProfile implements ScrapperProfile{ 10 | 11 | 12 | public String getName() { 13 | // TODO Auto-generated method stub 14 | return "Slexyorg"; 15 | } 16 | 17 | public Scrapper newInstance(Properties properties) { 18 | // TODO Auto-generated method stub 19 | PastieImpl pastie=new PastieImpl(); 20 | pastie.initScrapper(properties); 21 | return pastie; 22 | } 23 | 24 | 25 | } 26 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/Snipplr.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.PastieImpl; 8 | 9 | public class Snipplr implements ScrapperProfile{ 10 | 11 | public String getName() { 12 | // TODO Auto-generated method stub 13 | return "Snipplr"; 14 | } 15 | 16 | public Scrapper newInstance(Properties properties) { 17 | // TODO Auto-generated method stub 18 | PastieImpl pastie=new PastieImpl(); 19 | pastie.initScrapper(properties); 20 | return pastie; 21 | } 22 | 23 | } 24 | 25 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/rtsprofile/TwitterProfile.java: -------------------------------------------------------------------------------- 1 | package org.rts.rtsprofile; 2 | 3 | import java.util.Properties; 4 | 5 | import org.rts.base.Scrapper; 6 | import org.rts.base.ScrapperProfile; 7 | import org.rts.impl.TwitterImpl; 8 | 9 | public class TwitterProfile implements ScrapperProfile{ 10 | 11 | public String getName() { 12 | // TODO Auto-generated method stub 13 | return "Twitter"; 14 | } 15 | 16 | public Scrapper newInstance(Properties properties) { 17 | // TODO Auto-generated method stub 18 | TwitterImpl twitter=new TwitterImpl(); 19 | twitter.initScrapper(properties); 20 | return twitter; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/utilities/Difference.java: -------------------------------------------------------------------------------- 1 | package org.rts.utilities; 2 | 3 | import java.util.Set; 4 | 5 | public class Difference 6 | { 7 | 8 | public static Set getDiff(Set previousSet,Set presentSet) 9 | { 10 | presentSet.removeAll(previousSet); 11 | return presentSet; 12 | } 13 | 14 | public static void main(String args []) 15 | { 16 | // Set test1 = new HashSet(); 17 | // test1.add("test1"); 18 | // test1.add("test2"); 19 | // test1.add("test3"); 20 | // test1.add("test5"); 21 | // test1.add("test6"); 22 | // 23 | // Set test2 = new HashSet(); 24 | // test2.add("test1"); 25 | // test2.add("test2"); 26 | // test2.add("test3"); 27 | // test2.add("test4"); 28 | // test2.add("test5"); 29 | // 30 | // Iterator i = getDiff(test1,test2).iterator(); 31 | // while (i.hasNext()) 32 | // { 33 | // String name = (String) i.next(); 34 | // System.out.println(name); 35 | // } 36 | 37 | } 38 | 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/utilities/JsonParserForGithub.java: -------------------------------------------------------------------------------- 1 | 2 | package org.rts.utilities; 3 | 4 | import java.util.HashSet; 5 | import java.util.Iterator; 6 | import java.util.Queue; 7 | import java.util.Set; 8 | import org.json.simple.JSONArray; 9 | import org.json.simple.JSONObject; 10 | import org.json.simple.parser.JSONParser; 11 | import org.json.simple.parser.ParseException; 12 | 13 | import com.google.common.collect.EvictingQueue; 14 | import org.kafkaparser.utilities.HttpUtilities; 15 | 16 | public class JsonParserForGithub { 17 | 18 | public static JSONParser parser = new JSONParser(); 19 | public static Set previousSet = new HashSet(); 20 | public static Set presentSet; 21 | public static Set diffSet = new HashSet(); 22 | public static Queue evictingQueue= EvictingQueue.create(100); 23 | 24 | public static Set githubUrlFetcher(String url) throws ParseException, InterruptedException 25 | { 26 | Object obj = parser.parse(HttpUtilities.sendGet(url)); 27 | JSONObject jsonObject = (JSONObject) obj; 28 | JSONArray array = (JSONArray)jsonObject.get("items"); 29 | Iterator iterator = array.iterator(); 30 | presentSet = new HashSet(); 31 | while(iterator.hasNext()) 32 | { 33 | JSONObject eachJsonObject = (JSONObject) iterator.next(); 34 | JSONObject eachrepositoryObject = (JSONObject) eachJsonObject.get("repository"); 35 | String html_url=(String)eachrepositoryObject.get("html_url"); 36 | html_url=html_url+".git"; 37 | if(!evictingQueue.contains(html_url)) 38 | { 39 | //System.out.println("Github "+html_url); 40 | presentSet.add(html_url); 41 | } 42 | evictingQueue.add(html_url); 43 | //System.out.println(eachJsonObject.get("html_url")); 44 | } 45 | //diffSet=Difference.getDiff(previousSet, presentSet); 46 | //previousSet=presentSet; 47 | return presentSet; 48 | } 49 | 50 | 51 | 52 | 53 | 54 | public static void main(String args []) throws InterruptedException 55 | { 56 | try { 57 | JsonParserForGithub.githubUrlFetcher("https://api.github.com/search/code?q=olacabs&sort=indexed&order=asc&access_token=ac2536a0beb59624d879e10084fe2d04404451bf&"); 58 | //JsonParser.githubUrlFetcher("https://pastebin.com/archive"); 59 | } catch (ParseException e) { 60 | // TODO Auto-generated catch block 61 | e.printStackTrace(); 62 | } 63 | } 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | } 72 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/utilities/JsonParserForReddit.java: -------------------------------------------------------------------------------- 1 | 2 | package org.rts.utilities; 3 | 4 | import java.util.HashSet; 5 | import java.util.Iterator; 6 | import java.util.Queue; 7 | import java.util.Set; 8 | 9 | import org.json.simple.JSONArray; 10 | import org.json.simple.JSONObject; 11 | import org.json.simple.parser.JSONParser; 12 | import org.json.simple.parser.ParseException; 13 | 14 | import com.google.common.collect.EvictingQueue; 15 | import org.kafkaparser.utilities.HttpUtilities; 16 | 17 | public class JsonParserForReddit { 18 | 19 | public static JSONParser parser = new JSONParser(); 20 | public static Set previousSet = new HashSet(); 21 | public static Set presentSet; 22 | public static Set diffSet = new HashSet(); 23 | public static Queue evictingQueue= EvictingQueue.create(100); 24 | 25 | public static Set redditUrlFetcher(String url) throws ParseException, InterruptedException 26 | { 27 | Object obj = parser.parse(HttpUtilities.sendGet(url)); 28 | JSONArray array = (JSONArray)((JSONObject) ((JSONObject) obj).get("data")).get("children"); 29 | @SuppressWarnings("unchecked") 30 | Iterator iterator = array.iterator(); 31 | presentSet = new HashSet(); 32 | while(iterator.hasNext()) 33 | { 34 | //Danger new fix is going on man 35 | JSONObject eachJsonObject = (JSONObject) iterator.next(); 36 | String redditUrl="https://www.reddit.com/"+(String)(((JSONObject)eachJsonObject.get("data")).get("permalink")); 37 | if(!evictingQueue.contains(redditUrl)) 38 | { 39 | presentSet.add(redditUrl); 40 | } 41 | evictingQueue.add(redditUrl); 42 | } 43 | return presentSet; 44 | } 45 | 46 | 47 | 48 | 49 | 50 | public static void main(String args []) throws InterruptedException 51 | { 52 | try { 53 | JsonParserForReddit.redditUrlFetcher("https://www.reddit.com/search.json?q=olacabs%20hacked"); 54 | //JsonParser.githubUrlFetcher("https://pastebin.com/archive"); 55 | } catch (ParseException e) { 56 | // TODO Auto-generated catch block 57 | e.printStackTrace(); 58 | } 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /rts-impl/src/main/java/org/rts/utilities/TruffleHog.java: -------------------------------------------------------------------------------- 1 | package org.rts.utilities; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | import java.lang.reflect.Field; 7 | import java.util.ArrayList; 8 | import java.util.HashSet; 9 | import java.util.Set; 10 | import java.util.concurrent.TimeUnit; 11 | import java.util.regex.Matcher; 12 | import java.util.regex.Pattern; 13 | 14 | import org.kafkaparser.utilities.ConfigData; 15 | import org.kafkaparser.utilities.DbUtil; 16 | import org.kafkaparser.utilities.EmailUtility; 17 | import org.sqlite.dataaccess.entity.SearchItem; 18 | import org.sqlite.dataaccess.util.DaoUtil; 19 | 20 | public class TruffleHog implements Runnable { 21 | 22 | private static String regexForSecret = "stringsFound\": (.*)}"; 23 | 24 | private String link; 25 | private String searchTerm; 26 | private String profile; 27 | private String regex; 28 | private String entropy; 29 | private String pythonPath; 30 | private String trufflehogPath; 31 | 32 | public void initilaize(String pastielink, String searchTerm, String profile, String regex, String entropy) { 33 | this.link = pastielink; 34 | this.searchTerm = searchTerm; 35 | this.profile = profile; 36 | this.regex = regex; 37 | this.entropy = entropy; 38 | 39 | } 40 | 41 | public Set getSecrets() throws IOException, InterruptedException { 42 | 43 | System.out.println("*********Entered trufflehog"); 44 | 45 | final Set secrets = new HashSet(); 46 | 47 | if (regex.equals("false")) { 48 | regex = ""; 49 | } else { 50 | regex = "--regex"; 51 | } 52 | System.out.println(":::: Config PATH: "+ConfigData.pythonPath+ "- "+ ConfigData.trufflehogPath); 53 | String[] cmd = { 54 | // "/usr/local/bin/python2.7", 55 | // "/usr/bin/python2.7", 56 | // "/Users/n0r00ij/Downloads/truffleHog-dev/truffleHog/truffleHog/truffleHog.py", 57 | ConfigData.pythonPath, ConfigData.trufflehogPath, regex, "--cleanup", "--entropy=" + entropy, "--json", 58 | link }; 59 | 60 | if (!DaoUtil.searchDuplicateByUrl(link)) { 61 | Process p = Runtime.getRuntime().exec(cmd); 62 | 63 | // p.waitFor(); 64 | BufferedReader bri = new BufferedReader(new InputStreamReader(p.getInputStream())); 65 | BufferedReader bre = new BufferedReader(new InputStreamReader(p.getErrorStream())); 66 | String line; 67 | while ((line = bri.readLine()) != null) { 68 | // System.out.println(line); 69 | // System.out.println(); 70 | secrets.addAll(extractRegexMatches(line, regexForSecret)); 71 | 72 | } 73 | bri.close(); 74 | while ((line = bre.readLine()) != null) { 75 | // System.out.println(line); 76 | secrets.addAll(extractRegexMatches(line, regexForSecret)); 77 | 78 | } 79 | bre.close(); 80 | // Important decide if this is needed or remove it 81 | p.waitFor(45, TimeUnit.MINUTES); 82 | 83 | p.destroyForcibly(); 84 | 85 | // p.destroy(); 86 | 87 | } 88 | 89 | Boolean is_Valid = false; 90 | if (secrets.size() > 0) { 91 | Set temp = new HashSet(); 92 | temp.add(link); 93 | EmailUtility.sendEmailUsingGmail(profile, temp, searchTerm); 94 | is_Valid = true; 95 | } 96 | 97 | if (!DaoUtil.searchDuplicateByUrl(link)) { 98 | 99 | DbUtil.addNewEntry(secrets, link, profile, is_Valid); 100 | 101 | } 102 | // System.out.println("*********Done"); 103 | 104 | return secrets; 105 | } 106 | 107 | public static Set extractRegexMatches(String line, String regex) { 108 | final Set matchSet = new HashSet(); 109 | Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE); 110 | Matcher matcher = pattern.matcher(line); 111 | while (matcher.find()) { 112 | final SearchItem searchItem = new SearchItem(); 113 | searchItem.setSearchItem(matcher.group(1)); 114 | matchSet.add(searchItem); 115 | } 116 | return matchSet; 117 | } 118 | 119 | @Override 120 | public void run() { 121 | // TODO Auto-generated method stub 122 | try { 123 | getSecrets(); 124 | } catch (IOException | InterruptedException e) { 125 | // TODO Auto-generated catch block 126 | e.printStackTrace(); 127 | } 128 | 129 | } 130 | 131 | /** 132 | * public static void main(String args[]) { try { 133 | * System.out.println(getSecrets("https://github.com/cogdog/tweets.git")); } 134 | * catch (IOException e) { // TODO Auto-generated catch block 135 | * e.printStackTrace(); } catch (InterruptedException e) { // TODO 136 | * Auto-generated catch block e.printStackTrace(); } } 137 | **/ 138 | 139 | } 140 | -------------------------------------------------------------------------------- /rts-impl/src/main/resources/META-INF/services/org.rts.base.ScrapperProfile: -------------------------------------------------------------------------------- 1 | org.rts.rtsprofile.TwitterProfile 2 | org.rts.rtsprofile.PastieProfile 3 | org.rts.rtsprofile.GithubProfile 4 | org.rts.rtsprofile.RedditProfile 5 | org.rts.rtsprofile.GistGithubProfile 6 | org.rts.rtsprofile.IdeonecomProfile 7 | org.rts.rtsprofile.KpastenetProfile 8 | org.rts.rtsprofile.PastebincaProfile 9 | org.rts.rtsprofile.PastebinfrProfile 10 | org.rts.rtsprofile.SlexyOrgProfile 11 | org.rts.rtsprofile.CodepadProfile 12 | org.rts.rtsprofile.Pasteorgru 13 | org.rts.rtsprofile.Lpaste 14 | org.rts.rtsprofile.Dumpz 15 | org.rts.rtsprofile.Snipplr -------------------------------------------------------------------------------- /rts-impl/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Root logger option 2 | log4j.rootLogger=error, stdout, file 3 | 4 | # Redirect log messages to console 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.Target=System.out 7 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n 9 | 10 | # Redirect log messages to a log file, support file rolling. 11 | log4j.appender.file=org.apache.log4j.RollingFileAppender 12 | log4j.appender.file.File=RTS.log 13 | log4j.appender.file.MaxFileSize=5MB 14 | log4j.appender.file.MaxBackupIndex=10 15 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n -------------------------------------------------------------------------------- /rts-impl/src/test/java/org/rts/impl/AppTest.java: -------------------------------------------------------------------------------- 1 | package org.rts.impl; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /scrapper_config/consumer.properties: -------------------------------------------------------------------------------- 1 | bootstrap.servers=localhost:9092 2 | group.id=test 3 | enable.auto.commit=true 4 | auto.commit.interval.ms=1000 5 | session.timeout.ms=300000 6 | request.timeout.ms=500000 7 | auto.offset.reset=earliest 8 | key.deserializer=org.apache.kafka.common.serialization.StringDeserializer 9 | value.deserializer=org.kafkaparser.deseralize.DataDeserializer 10 | # These buffer sizes seem to be needed to avoid consumer switching to 11 | # a mode where it processes one bufferful every 5 seconds with multiple 12 | # timeouts along the way. No idea why this happens. 13 | fetch.min.bytes=50000 14 | receive.buffer.bytes=262144 15 | max.partition.fetch.bytes=2097152 -------------------------------------------------------------------------------- /scrapper_config/email.properties: -------------------------------------------------------------------------------- 1 | from-email = invited.tomail@gmail.com 2 | to-email = naveen.rudra02@gmail.com 3 | password = Padma-18 4 | mail.smtp.auth= true 5 | mail.smtp.starttls.enable =true 6 | mail.smtp.host = smtp.gmail.com 7 | mail.smtp.port = 587 8 | -------------------------------------------------------------------------------- /scrapper_config/global.properties: -------------------------------------------------------------------------------- 1 | pythonpath=/usr/bin/python2.7 2 | trufflehogpath=/home/n0r00ij/truffleHog/truffleHog/truffleHog.py 3 | #pythonpath=/usr/local/bin/python2.7 4 | #trufflehogpath=/Users/n0r00ij/Downloads/truffleHog/truffleHog/truffleHog.py 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /scrapper_config/look for ssrf via host header: -------------------------------------------------------------------------------- 1 | look for ssrf via host header 2 | 3 | Verbose error message: 4 | 1. Add a "Debug=true" directive at the top of the file that generated the error. Example: 5 | 6 | <%@ Page Language="C#" Debug="true" %> 7 | 8 | or: 9 | 10 | 2) Add the following section to the configuration file of your application: 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | Forceful browsing: 19 | https://suppliersourcing.qa.walmart.com/odc_selection.aspx 20 | 21 | Viewstate can be decryoted -------------------------------------------------------------------------------- /scrapper_config/producer.properties: -------------------------------------------------------------------------------- 1 | bootstrap.servers=localhost:9092 2 | acks=all 3 | retries=0 4 | batch.size=16384 5 | auto.commit.interval.ms=1000 6 | linger.ms=0 7 | key.serializer=org.apache.kafka.common.serialization.StringSerializer 8 | value.serializer=org.kafkaparser.serialize.DataSerializer 9 | block.on.buffer.full=true -------------------------------------------------------------------------------- /scrapper_config/proxy.properties: -------------------------------------------------------------------------------- 1 | proxy_ip=web-proxy.ind.hp.com 2 | #proxy_ip=127.0.0.1 3 | proxy_port=8080 -------------------------------------------------------------------------------- /scraptool/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /scraptool/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /scraptool/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | scraptool 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /scraptool/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/test/java=UTF-8 4 | encoding/=UTF-8 5 | -------------------------------------------------------------------------------- /scraptool/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /scraptool/RTS.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/scraptool/RTS.db -------------------------------------------------------------------------------- /scraptool/RTS.log: -------------------------------------------------------------------------------- 1 | 2019-08-15 19:52:43 ERROR ScrapperTool:90 - Something wrong happened in scrappers 2 | java.lang.NullPointerException 3 | at org.rts.impl.PastieImpl.initScrapper(PastieImpl.java:45) 4 | at org.rts.rtsprofile.CodepadProfile.newInstance(CodepadProfile.java:19) 5 | at org.rts.base.profileregistry.ScrapperProfileRegistry.newScrapProfile(ScrapperProfileRegistry.java:30) 6 | at org.rts.base.ScrapperImpl.initialize(ScrapperImpl.java:51) 7 | at org.rts.base.ScrapperImpl.initialize(ScrapperImpl.java:38) 8 | at org.scraptool.ScrapperTool.initializeScrappers(ScrapperTool.java:40) 9 | at org.scraptool.ScrapperTool.startScrappers(ScrapperTool.java:80) 10 | at org.scraptool.ScrapperTool.main(ScrapperTool.java:127) 11 | 2019-08-15 19:54:17 ERROR ScrapperTool:90 - Something wrong happened in scrappers 12 | java.lang.NullPointerException 13 | at org.rts.impl.PastieImpl.initScrapper(PastieImpl.java:45) 14 | at org.rts.rtsprofile.CodepadProfile.newInstance(CodepadProfile.java:19) 15 | at org.rts.base.profileregistry.ScrapperProfileRegistry.newScrapProfile(ScrapperProfileRegistry.java:30) 16 | at org.rts.base.ScrapperImpl.initialize(ScrapperImpl.java:51) 17 | at org.rts.base.ScrapperImpl.initialize(ScrapperImpl.java:38) 18 | at org.scraptool.ScrapperTool.initializeScrappers(ScrapperTool.java:40) 19 | at org.scraptool.ScrapperTool.startScrappers(ScrapperTool.java:80) 20 | at org.scraptool.ScrapperTool.main(ScrapperTool.java:127) 21 | -------------------------------------------------------------------------------- /scraptool/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.rts.scrap 7 | rts 8 | 1.0-SNAPSHOT 9 | 10 | com.rts.scrap 11 | scraptool 12 | 1.0-SNAPSHOT 13 | scraptool 14 | http://maven.apache.org 15 | 16 | UTF-8 17 | 18 | 19 | 20 | log4j 21 | log4j 22 | 1.2.17 23 | 24 | 25 | commons-cli 26 | commons-cli 27 | 1.4 28 | 29 | 30 | com.google.guava 31 | guava 32 | 18.0 33 | 34 | 35 | com.rts.scrap 36 | rts-impl 37 | 1.0-SNAPSHOT 38 | 39 | 40 | junit 41 | junit 42 | 3.8.1 43 | test 44 | 45 | 46 | 47 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | org.apache.maven.plugins 67 | maven-shade-plugin 68 | 2.4 69 | 70 | 71 | 72 | package 73 | 74 | shade 75 | 76 | 77 | 78 | 79 | *:* 80 | 81 | META-INF/*.SF 82 | META-INF/*.DSA 83 | META-INF/*.RSA 84 | 85 | 86 | 87 | 88 | 89 | 91 | 92 | org.scraptool.ScrapperTool 93 | 1.0 94 | true 95 | 96 | 97 | 98 | 99 | true 100 | standalone 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /scraptool/src/main/java/org/scraptool/ScrapperTool.java: -------------------------------------------------------------------------------- 1 | package org.scraptool; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import java.util.Map.Entry; 8 | import org.kafkaparser.base.NotificationConsumerGroup; 9 | import org.kafkaparser.base.Producer; 10 | import org.rts.base.Scrapper; 11 | import org.rts.base.ScrapperImpl; 12 | import org.kafkaparser.utilities.ConfigData; 13 | 14 | import org.apache.log4j.Logger; 15 | import org.apache.commons.cli.*; 16 | 17 | public class ScrapperTool { 18 | 19 | static ScrapperImpl scrapperimpl; 20 | static HashMap allThreads = new HashMap(); 21 | final static Logger logger = Logger.getLogger(ScrapperTool.class); 22 | public static String configDirectory; 23 | public static String topicname; 24 | static File configDirectoryfile; 25 | 26 | public static void initializeScrappers(String configDirectory) 27 | { 28 | //proper error handling and null pointer exception is a must and code ahs to be written man 29 | //logger check for proeprties file befor ekicking off 30 | //check if internet connection is there,log and pause threads if no internet 31 | //logger check for inetrnet connnection 32 | //logger check for any otehr things like rate limiting 200ok issues and all 33 | //logger for email too 34 | 35 | System.out.println( "Initializing of scrapper has started!!" ); 36 | logger.info("Initializing of scrapper has started!!"); 37 | try{ 38 | scrapperimpl= ScrapperImpl.getInstance(); 39 | configDirectoryfile = new File(ConfigData.configDirectory); 40 | scrapperimpl.initialize(configDirectoryfile); 41 | Producer.initialize(configDirectoryfile); 42 | 43 | } 44 | catch (IOException e) { 45 | //logger.error("Iniltizlization of scrappers has failed", e); 46 | e.printStackTrace(); 47 | } 48 | } 49 | 50 | public static void checkThreadsStatus() throws InterruptedException 51 | { 52 | while (true) 53 | { 54 | for (Map.Entry entry : allThreads.entrySet()) 55 | { 56 | if(!entry.getValue().isAlive()) 57 | { 58 | System.out.println("Restrating the thread: "+entry.getKey()+" The reason being it not alive."); 59 | allThreads.remove(entry.getKey()); 60 | startThread(entry.getKey()); 61 | logger.error(entry.getKey()+"Thread has been restarted succeafully"); 62 | } 63 | } 64 | Thread.sleep(60000); 65 | } 66 | } 67 | 68 | public static void startThread(String threadname) 69 | { 70 | Scrapper profile=scrapperimpl.getScrapperMap().get(threadname); 71 | Thread profilethread=new Thread(profile); 72 | profilethread.start(); 73 | allThreads.put(threadname, profilethread); 74 | } 75 | 76 | public static void startScrappers() 77 | { 78 | 79 | try { 80 | initializeScrappers(configDirectory); 81 | for (Entry entry : scrapperimpl.getScrapperMap().entrySet()) 82 | { 83 | startThread(entry.getKey()); 84 | } 85 | NotificationConsumerGroup newgroup; 86 | newgroup = new NotificationConsumerGroup(5, ConfigData.topicName,configDirectoryfile); 87 | newgroup.execute(); 88 | //checkThreadsStatus(); 89 | } catch (Exception e) { 90 | logger.error("Something wrong happened in scrappers", e); 91 | e.printStackTrace(); 92 | } 93 | 94 | 95 | } 96 | 97 | public static void main(String args[]) 98 | { 99 | Options options = new Options(); 100 | 101 | Option input1 = new Option("c", "configDirectory", true, "configDirectory path"); 102 | input1.setRequired(true); 103 | options.addOption(input1); 104 | 105 | Option input2 = new Option("t", "topicname", true, "topicname of kafka"); 106 | input2.setRequired(true); 107 | options.addOption(input2); 108 | 109 | CommandLineParser parser = new DefaultParser(); 110 | HelpFormatter formatter = new HelpFormatter(); 111 | CommandLine cmd; 112 | 113 | try { 114 | cmd = parser.parse(options, args); 115 | } catch (ParseException e) { 116 | System.out.println(e.getMessage()); 117 | formatter.printHelp("Scrapper", options); 118 | System.exit(1); 119 | return; 120 | } 121 | 122 | ConfigData.configDirectory=cmd.getOptionValue("configDirectory"); 123 | ConfigData.topicName=cmd.getOptionValue("topicname"); 124 | //configDirectory = cmd.getOptionValue("configDirectory"); 125 | //topicname = cmd.getOptionValue("topicname"); 126 | 127 | ScrapperTool.startScrappers(); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /scraptool/src/test/java/org/scraptool/AppTest.java: -------------------------------------------------------------------------------- 1 | package org.scraptool; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /script/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/script/.DS_Store -------------------------------------------------------------------------------- /script/cleanup.sh: -------------------------------------------------------------------------------- 1 | #delete topic 2 | /opt/kafka/bin/kafka-topics.sh --zookeeper localhost:2181 --delete --topic test 3 | 4 | sleep 2 5 | 6 | #stop kafka server 7 | /opt/kafka/bin/kafka-server-stop.sh 8 | 9 | sleep 5 10 | #stop zookeeper 11 | /opt/kafka/bin/zookeeper-server-stop.sh 12 | 13 | sleep 1 14 | 15 | rm -rf /tmp/kafka-logs 16 | rm -rf /opt/kafka/logs -------------------------------------------------------------------------------- /script/db_setup.sh: -------------------------------------------------------------------------------- 1 | echo "create database rttm COLLATE 'utf8_unicode_ci';" |mysql -uroot 2 | echo "CREATE USER 'admin'@'localhost' IDENTIFIED BY 'password';" |mysql -uroot 3 | echo "GRANT ALL PRIVILEGES ON *.* TO 'admin'@'localhost' WITH GRANT OPTION;" |mysql -uroot 4 | echo "CREATE USER 'admin'@'%' IDENTIFIED BY 'password';" |mysql -uroot 5 | echo "GRANT ALL PRIVILEGES ON *.* TO 'admin'@'%' WITH GRANT OPTION;" |mysql -uroot 6 | echo "SHOW GRANTS FOR admin;" |mysql -uroot 7 | echo "FLUSH PRIVILEGES;" |mysql -uroot -------------------------------------------------------------------------------- /script/initialize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Update server.properties when executing with delete.topic.enable=true 4 | # env var will set when executing docker run 5 | 6 | 7 | # Start to run zookeeper as background process 8 | /opt/kafka/bin/zookeeper-server-start.sh /opt/kafka/config/zookeeper.properties & 9 | 10 | sleep 3 11 | 12 | # Start kafka server 13 | /opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/server.properties & 14 | 15 | seelp 4 16 | 17 | #Create a topic 18 | /opt/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test 19 | 20 | 21 | #Start mysql server 22 | service mysql start 23 | 24 | git config --global user.email "test@example.com" 25 | 26 | 27 | exit 28 | -------------------------------------------------------------------------------- /sqlite-dataaccess/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /sqlite-dataaccess/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /sqlite-dataaccess/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | sqlite-dataaccess 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /sqlite-dataaccess/.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/main/resources=UTF-8 4 | encoding//src/test/java=UTF-8 5 | encoding/=UTF-8 6 | -------------------------------------------------------------------------------- /sqlite-dataaccess/.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /sqlite-dataaccess/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.rts.scrap 7 | rts 8 | 1.0-SNAPSHOT 9 | 10 | com.rts.scrap 11 | sqlite-dataaccess 12 | 1.0-SNAPSHOT 13 | sqlite-dataaccess 14 | http://maven.apache.org 15 | 16 | 17 | UTF-8 18 | 19 | 20 | 21 | 22 | org.hibernate.javax.persistence 23 | hibernate-jpa-2.1-api 24 | 1.0.2.Final 25 | 26 | 27 | 28 | org.hibernate 29 | hibernate-entitymanager 30 | 5.4.4.Final 31 | 32 | 33 | mysql 34 | mysql-connector-java 35 | 8.0.14 36 | 37 | 38 | org.xerial 39 | sqlite-jdbc 40 | 3.6.17 41 | test 42 | 43 | 44 | org.slf4j 45 | slf4j-log4j12 46 | 1.6.4 47 | 48 | 49 | 50 | log4j 51 | log4j 52 | 1.2.17 53 | 54 | 55 | junit 56 | junit 57 | 4.9 58 | test 59 | 60 | 61 | org.xerial 62 | sqlite-jdbc 63 | 3.21.0 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /sqlite-dataaccess/src/main/java/org/sqlite/dataaccess/entity/Result.java: -------------------------------------------------------------------------------- 1 | package org.sqlite.dataaccess.entity; 2 | 3 | import java.io.Serializable; 4 | import java.util.HashSet; 5 | import java.util.Set; 6 | 7 | import javax.persistence.CascadeType; 8 | import javax.persistence.Column; 9 | import javax.persistence.Entity; 10 | import javax.persistence.FetchType; 11 | import javax.persistence.Id; 12 | import javax.persistence.JoinColumn; 13 | import javax.persistence.JoinTable; 14 | import javax.persistence.Lob; 15 | import javax.persistence.ManyToMany; 16 | 17 | /** 18 | * 19 | * @author Josue R G Junior josueribeiro.jr@gmail.com 20 | */ 21 | @Entity 22 | public class Result implements Serializable { 23 | 24 | private static final long serialVersionUID = -7250234396452258822L; 25 | 26 | @Id 27 | @Column(name = "url",unique = true, updatable = false, nullable = false) 28 | private String url; 29 | private String time; 30 | private String searchedtext; 31 | @Column(length=1000000) 32 | @Lob 33 | @ManyToMany(fetch=FetchType.EAGER, cascade = {CascadeType.PERSIST, CascadeType.MERGE}) 34 | @JoinTable(name="result_search_item", joinColumns = { 35 | @JoinColumn(name = "url", nullable = false)}, 36 | inverseJoinColumns = { @JoinColumn(name = "id", 37 | nullable = false)} 38 | ) 39 | private Set searchedItemSet = new HashSet(); 40 | @Column(name = "is_valid") 41 | private Boolean isValid; 42 | 43 | public Boolean getIsValid() { 44 | return isValid; 45 | } 46 | 47 | public void setIsValid(Boolean isValid) { 48 | this.isValid = isValid; 49 | } 50 | 51 | public String getBotName() { 52 | return botName; 53 | } 54 | 55 | public void setBotName(String botName) { 56 | this.botName = botName; 57 | } 58 | 59 | private String botName; 60 | 61 | public String getUrl() { 62 | return url; 63 | } 64 | 65 | public void setUrl(String url) { 66 | this.url = url; 67 | } 68 | 69 | public String getTime() { 70 | return time; 71 | } 72 | 73 | public void setTime(String time) { 74 | this.time = time; 75 | } 76 | 77 | public String getSearchedtext() { 78 | return searchedtext; 79 | } 80 | 81 | public void setSearchedtext(String searchedtext) { 82 | this.searchedtext = searchedtext; 83 | } 84 | 85 | @Override 86 | public int hashCode() { 87 | final int prime = 31; 88 | int result = 1; 89 | result = prime * result + ((time == null) ? 0 : time.hashCode()); 90 | result = prime * result + ((url == null) ? 0 : url.hashCode()); 91 | return result; 92 | } 93 | 94 | @Override 95 | public boolean equals(Object obj) { 96 | if (this == obj) 97 | return true; 98 | if (obj == null) 99 | return false; 100 | if (getClass() != obj.getClass()) 101 | return false; 102 | Result other = (Result) obj; 103 | if (time == null) { 104 | if (other.time != null) 105 | return false; 106 | } else if (!time.equals(other.time)) 107 | return false; 108 | if (url == null) { 109 | if (other.url != null) 110 | return false; 111 | } else if (!url.equals(other.url)) 112 | return false; 113 | return true; 114 | } 115 | 116 | // add one extra column from future perspective 117 | // add one extra column if it is false or true 118 | public Set getSearchedTerms() { 119 | return searchedItemSet; 120 | } 121 | 122 | public void setSearchedTerms(Set searchedItemSet) { 123 | this.searchedItemSet = searchedItemSet; 124 | } 125 | 126 | @Override 127 | public String toString() { 128 | return "Result [url=" + url + ", time=" + time + ", searchedtext=" + searchedtext + ", searchedItemSet=" 129 | + searchedItemSet + ", isValid=" + isValid + ", botName=" + botName + "]"; 130 | } 131 | 132 | } -------------------------------------------------------------------------------- /sqlite-dataaccess/src/main/java/org/sqlite/dataaccess/entity/SearchItem.java: -------------------------------------------------------------------------------- 1 | package org.sqlite.dataaccess.entity; 2 | 3 | import java.io.Serializable; 4 | import java.util.HashSet; 5 | import java.util.Set; 6 | 7 | import javax.persistence.Column; 8 | import javax.persistence.Entity; 9 | import javax.persistence.FetchType; 10 | import javax.persistence.GeneratedValue; 11 | import javax.persistence.GenerationType; 12 | import javax.persistence.Id; 13 | import javax.persistence.ManyToMany; 14 | import javax.persistence.Table; 15 | 16 | /** 17 | * 18 | * @author Sunny Sharma sunnysharmagts@gmail.com 19 | */ 20 | @Entity 21 | @Table(name = "search_item") 22 | public class SearchItem implements Serializable { 23 | 24 | @Id 25 | @GeneratedValue(strategy = GenerationType.IDENTITY) 26 | @Column(name = "id", unique = true, updatable=false, nullable = false) 27 | private Integer id; 28 | 29 | @Column(name = "search_term", unique = true, nullable = false) 30 | private String searchItem; 31 | 32 | @ManyToMany(fetch = FetchType.EAGER, mappedBy = "searchedItemSet") 33 | private Set resultSet = new HashSet(); 34 | 35 | public Integer getId() { 36 | return id; 37 | } 38 | 39 | public void setId(final int id) { 40 | this.id = id; 41 | } 42 | 43 | public String getSearchItem() { 44 | return searchItem; 45 | } 46 | 47 | public void setSearchItem(String searchItem) { 48 | this.searchItem = searchItem; 49 | } 50 | 51 | public Set getResult() { 52 | return this.resultSet; 53 | } 54 | 55 | public void setResult(Set resultSet) { 56 | this.resultSet = resultSet; 57 | } 58 | 59 | public void addResult(Result result) { 60 | this.resultSet.add(result); 61 | result.getSearchedTerms().add(this); 62 | } 63 | 64 | public void removeResult(Result result) { 65 | this.resultSet.remove(result); 66 | result.getSearchedTerms().remove(this); 67 | } 68 | 69 | @Override 70 | public String toString() { 71 | return "SearchItem [id=" + id + ", searchItem=" + searchItem + "]"; 72 | } 73 | } -------------------------------------------------------------------------------- /sqlite-dataaccess/src/main/java/org/sqlite/dataaccess/util/DaoUtil.java: -------------------------------------------------------------------------------- 1 | package org.sqlite.dataaccess.util; 2 | 3 | import java.text.DateFormat; 4 | import java.text.SimpleDateFormat; 5 | import java.util.ArrayList; 6 | import java.util.Date; 7 | import java.util.HashSet; 8 | import java.util.Set; 9 | 10 | import javax.persistence.TypedQuery; 11 | import javax.transaction.Transactional; 12 | 13 | import org.sqlite.dataaccess.entity.Result; 14 | import org.sqlite.dataaccess.entity.SearchItem; 15 | 16 | public class DaoUtil { 17 | 18 | static 19 | { 20 | EMfactory.setUp(); 21 | EMfactory.initEntityManager(); 22 | } 23 | 24 | @Transactional 25 | public synchronized static void insert(Result data) { 26 | try { 27 | EMfactory.em.getTransaction().begin(); 28 | EMfactory.em.persist(data); 29 | EMfactory.em.getTransaction().commit(); 30 | } catch(final Exception e) { 31 | e.printStackTrace(); 32 | } finally { 33 | //EMfactory.em.close(); 34 | } 35 | } 36 | 37 | @Transactional 38 | public synchronized static void merge(Result data) { 39 | try { 40 | EMfactory.em.getTransaction().begin(); 41 | EMfactory.em.merge(data); 42 | EMfactory.em.getTransaction().commit(); 43 | } catch(final Exception e) { 44 | e.printStackTrace(); 45 | } finally { 46 | //EMfactory.em.close(); 47 | } 48 | } 49 | 50 | @Transactional 51 | public synchronized static boolean searchDuplicateByUrl(String url) 52 | { 53 | //System.out.println("In db url is : "+url); 54 | 55 | //TypedQuery query = EMfactory.em.createQuery( 56 | // "SELECT result FROM Result result where result.url='"+url+"'" , Result.class); 57 | TypedQuery query = EMfactory.em.createQuery( 58 | "SELECT result FROM Result result where result.url=:url" , Result.class); 59 | query.setParameter("url", url); 60 | ArrayList results = (ArrayList) query.getResultList(); 61 | 62 | //System.out.println("query size :"+Integer.toString(results.size())); 63 | if(results.size()>0) 64 | { 65 | return true; 66 | } 67 | return false; 68 | } 69 | 70 | public static void main (String [] args) 71 | { 72 | // Result person = new Result(); 73 | // ArrayList test=new ArrayList<>(); 74 | // test.add("asd"); 75 | // person.setSearchedTerms(test); 76 | // person.setSearchedtext("some lines up and down man"); 77 | // person.setUrl("http://google.com4"); 78 | // DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); 79 | // Date date = new Date(); 80 | // person.setTime(dateFormat.format(date).toString()); 81 | System.out.println("Stated intializing*****************************************************************************"); 82 | for (int i=0;i<3;i++) 83 | { 84 | if(searchDuplicateByUrl("http://google.com71")) 85 | { 86 | System.out.println(Integer.toString(i) +" *************************-----found"); 87 | System.out.println(Integer.toString(i) +" *************************-----found"); 88 | System.out.println(Integer.toString(i) +" *************************-----found"); 89 | } 90 | 91 | else 92 | { 93 | System.out.println(Integer.toString(i) +" *************************-----not found"); 94 | System.out.println(Integer.toString(i) +" *************************-----not found"); 95 | System.out.println(Integer.toString(i) +" *************************-----not found"); 96 | 97 | } 98 | 99 | Result person = new Result(); 100 | Set test=new HashSet(); 101 | final SearchItem searchItem = new SearchItem(); 102 | searchItem.setSearchItem("asd"); 103 | test.add(searchItem); 104 | person.setSearchedTerms(test); 105 | person.setSearchedtext("some lines up and down man"); 106 | person.setUrl("http://google.com71"); 107 | DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); 108 | Date date = new Date(); 109 | person.setTime(dateFormat.format(date).toString()); 110 | DaoUtil.insert(person); 111 | Result person1 = new Result(); 112 | ArrayList test1=new ArrayList<>(); 113 | test1.add("asdq"); 114 | person1.setSearchedTerms(test); 115 | person1.setSearchedtext("some lines up and down man"); 116 | person1.setUrl("http://google.com712"); 117 | DaoUtil.insert(person1); 118 | 119 | 120 | } 121 | //EMfactory.em.persist(person); 122 | //EMfactory.em.getTransaction().commit(); 123 | 124 | 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /sqlite-dataaccess/src/main/java/org/sqlite/dataaccess/util/EMfactory.java: -------------------------------------------------------------------------------- 1 | package org.sqlite.dataaccess.util; 2 | 3 | import javax.persistence.Persistence; 4 | import javax.persistence.EntityManager; 5 | import javax.persistence.EntityManagerFactory; 6 | 7 | public class EMfactory { 8 | 9 | public static EntityManagerFactory emf; 10 | public static EntityManager em; 11 | 12 | public static void setUp() { 13 | emf = Persistence.createEntityManagerFactory("mysql-dataAccess"); 14 | } 15 | 16 | public static void initEntityManager() { 17 | em = emf.createEntityManager(); 18 | 19 | 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /sqlite-dataaccess/src/main/resources/META-INF/persistence.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | org.hibernate.ejb.HibernatePersistence 6 | org.sqlite.dataaccess.entity.Result 7 | org.sqlite.dataaccess.entity.SearchItem 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | org.hibernate.ejb.HibernatePersistence 20 | org.sqlite.dataaccess.entity.SearchItem 21 | org.sqlite.dataaccess.entity.Result 22 | 23 | 24 | 25 | 26 | 27 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /sqlite-dataaccess/src/main/resources/import.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NaveenRudra/RTTM/34bad8c42c708a74333db5b7d932fb6422bb8308/sqlite-dataaccess/src/main/resources/import.sql -------------------------------------------------------------------------------- /sqlite-dataaccess/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 2 | log4j.appender.stdout.Target=System.out 3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 4 | log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n 5 | log4j.rootLogger=all, stdout 6 | #log4j.logger.org.hibernate=all 7 | #log4j.logger.org.hibernate.type=all 8 | #log4j.logger.org.hibernate.tool.hbm2ddl=debug -------------------------------------------------------------------------------- /sqlite-dataaccess/src/test/java/org/sqlite/dataaccess/AppTest.java: -------------------------------------------------------------------------------- 1 | package org.sqlite.dataaccess; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | --------------------------------------------------------------------------------