├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── cli ├── README.md ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── crawljax │ │ │ └── cli │ │ │ ├── JarRunner.java │ │ │ ├── LogUtil.java │ │ │ └── ParameterInterpeter.java │ └── resources │ │ ├── jar-with-dependencies.xml │ │ ├── logback.xml │ │ └── project.version │ └── test │ └── java │ └── com │ └── crawljax │ ├── cli │ ├── JarRunnerTest.java │ └── LogUtilTest.java │ └── test │ └── util │ └── CaptureSystemStreams.java ├── core ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── crawljax │ │ │ ├── browser │ │ │ ├── EmbeddedBrowser.java │ │ │ ├── WebDriverBackedEmbeddedBrowser.java │ │ │ ├── WebDriverBrowserBuilder.java │ │ │ └── package-info.java │ │ │ ├── condition │ │ │ ├── Condition.java │ │ │ ├── ConditionType.java │ │ │ ├── ConditionTypeChecker.java │ │ │ ├── CountCondition.java │ │ │ ├── JavaScriptCondition.java │ │ │ ├── Logic.java │ │ │ ├── NotRegexCondition.java │ │ │ ├── NotUrlCondition.java │ │ │ ├── NotVisibleCondition.java │ │ │ ├── NotXPathCondition.java │ │ │ ├── RegexCondition.java │ │ │ ├── UrlCondition.java │ │ │ ├── VisibleCondition.java │ │ │ ├── XPathCondition.java │ │ │ ├── browserwaiter │ │ │ │ ├── ExpectedCondition.java │ │ │ │ ├── ExpectedElementCondition.java │ │ │ │ ├── ExpectedVisibleCondition.java │ │ │ │ ├── WaitCondition.java │ │ │ │ ├── WaitConditionChecker.java │ │ │ │ └── package-info.java │ │ │ ├── crawlcondition │ │ │ │ ├── CrawlCondition.java │ │ │ │ └── package-info.java │ │ │ ├── eventablecondition │ │ │ │ ├── EventableCondition.java │ │ │ │ ├── EventableConditionChecker.java │ │ │ │ └── package-info.java │ │ │ └── invariant │ │ │ │ ├── Invariant.java │ │ │ │ └── package-info.java │ │ │ ├── core │ │ │ ├── CandidateCrawlAction.java │ │ │ ├── CandidateElement.java │ │ │ ├── CandidateElementExtractor.java │ │ │ ├── CandidateElementManager.java │ │ │ ├── CrawlController.java │ │ │ ├── CrawlQueue.java │ │ │ ├── CrawlSession.java │ │ │ ├── CrawlSessionNotSetupYetException.java │ │ │ ├── CrawlTaskConsumer.java │ │ │ ├── Crawler.java │ │ │ ├── CrawlerContext.java │ │ │ ├── CrawlerLeftDomainException.java │ │ │ ├── CrawljaxException.java │ │ │ ├── CrawljaxRunner.java │ │ │ ├── ExitNotifier.java │ │ │ ├── ExtractorManager.java │ │ │ ├── StateUnreachableException.java │ │ │ ├── UnfiredCandidateActions.java │ │ │ ├── configuration │ │ │ │ ├── AcceptAllFramesChecker.java │ │ │ │ ├── BrowserConfiguration.java │ │ │ │ ├── ConfigurationHelper.java │ │ │ │ ├── CrawlActionsBuilder.java │ │ │ │ ├── CrawlElement.java │ │ │ │ ├── CrawlRules.java │ │ │ │ ├── CrawlScope.java │ │ │ │ ├── CrawljaxConfiguration.java │ │ │ │ ├── DefaultCrawlScope.java │ │ │ │ ├── DefaultUnexpectedAlertHandler.java │ │ │ │ ├── Form.java │ │ │ │ ├── FormAction.java │ │ │ │ ├── FormInputField.java │ │ │ │ ├── IgnoreFrameChecker.java │ │ │ │ ├── InputField.java │ │ │ │ ├── InputSpecification.java │ │ │ │ ├── PreCrawlConfiguration.java │ │ │ │ ├── ProxyConfiguration.java │ │ │ │ └── UnexpectedAlertHandler.java │ │ │ ├── exception │ │ │ │ ├── BrowserConnectionException.java │ │ │ │ ├── CrawlPathToException.java │ │ │ │ └── package-info.java │ │ │ ├── plugin │ │ │ │ ├── DomChangeNotifierPlugin.java │ │ │ │ ├── GeneratesOutput.java │ │ │ │ ├── HostInterface.java │ │ │ │ ├── HostInterfaceImpl.java │ │ │ │ ├── OnBrowserCreatedPlugin.java │ │ │ │ ├── OnFireEventFailedPlugin.java │ │ │ │ ├── OnInvariantViolationPlugin.java │ │ │ │ ├── OnNewStatePlugin.java │ │ │ │ ├── OnRevisitStatePlugin.java │ │ │ │ ├── OnUrlLoadPlugin.java │ │ │ │ ├── Plugin.java │ │ │ │ ├── Plugins.java │ │ │ │ ├── PostCrawlingPlugin.java │ │ │ │ ├── PreCrawlingPlugin.java │ │ │ │ ├── PreStateCrawlingPlugin.java │ │ │ │ └── descriptor │ │ │ │ │ ├── Parameter.java │ │ │ │ │ ├── PluginDescriptor.java │ │ │ │ │ └── jaxb │ │ │ │ │ └── generated │ │ │ │ │ ├── ObjectFactory.java │ │ │ │ │ ├── OptionList.java │ │ │ │ │ ├── Parameter.java │ │ │ │ │ ├── ParameterList.java │ │ │ │ │ ├── PluginDescriptor.java │ │ │ │ │ └── VersionList.java │ │ │ └── state │ │ │ │ ├── CrawlPath.java │ │ │ │ ├── DefaultStateVertexFactory.java │ │ │ │ ├── Element.java │ │ │ │ ├── Eventable.java │ │ │ │ ├── Identification.java │ │ │ │ ├── InMemoryStateFlowGraph.java │ │ │ │ ├── StateFlowGraph.java │ │ │ │ ├── StateMachine.java │ │ │ │ ├── StateVertex.java │ │ │ │ ├── StateVertexFactory.java │ │ │ │ └── StateVertexImpl.java │ │ │ ├── di │ │ │ ├── ConfigurationModule.java │ │ │ ├── CoreModule.java │ │ │ └── CrawlSessionProvider.java │ │ │ ├── forms │ │ │ ├── FormHandler.java │ │ │ ├── FormInput.java │ │ │ ├── FormInputValueHelper.java │ │ │ ├── InputValue.java │ │ │ ├── RandomInputValueGenerator.java │ │ │ └── package-info.java │ │ │ ├── metrics │ │ │ └── MetricsModule.java │ │ │ ├── oraclecomparator │ │ │ ├── AbstractComparator.java │ │ │ ├── Comparator.java │ │ │ ├── OracleComparator.java │ │ │ ├── StateComparator.java │ │ │ ├── comparators │ │ │ │ ├── AttributeComparator.java │ │ │ │ ├── DateComparator.java │ │ │ │ ├── EditDistanceComparator.java │ │ │ │ ├── PlainStructureComparator.java │ │ │ │ ├── RegexComparator.java │ │ │ │ ├── ScriptComparator.java │ │ │ │ ├── SimpleComparator.java │ │ │ │ ├── StyleComparator.java │ │ │ │ ├── XPathExpressionComparator.java │ │ │ │ └── package-info.java │ │ │ └── package-info.java │ │ │ └── util │ │ │ ├── DOMComparer.java │ │ │ ├── DomDifferenceListener.java │ │ │ ├── DomHistoryElement.java │ │ │ ├── DomUtils.java │ │ │ ├── ElementResolver.java │ │ │ ├── HtmlNamespace.java │ │ │ ├── UrlUtils.java │ │ │ ├── XMLObject.java │ │ │ ├── XPathHelper.java │ │ │ └── package-info.java │ └── resources │ │ └── com │ │ └── crawljax │ │ └── core │ │ └── plugin │ │ └── descriptor │ │ └── jaxb │ │ └── plugin-descriptor.xsd │ └── test │ ├── java │ └── com │ │ └── crawljax │ │ ├── browser │ │ ├── BrowserClosesDownloadPopUp.java │ │ ├── BrowserProvider.java │ │ ├── ChromeProxyConfig.java │ │ ├── WebDriverBackedEmbeddedBrowserNoCrashTest.java │ │ ├── WebDriverBackedEmbeddedBrowserTest.java │ │ └── matchers │ │ │ └── StateFlowGraphMatchers.java │ │ ├── condition │ │ ├── BrowserDoesntLeaveUrlTest.java │ │ ├── ConditionTest.java │ │ └── browserwaiter │ │ │ └── WaitConditionTest.java │ │ ├── core │ │ ├── CandidateElementExtractorTest.java │ │ ├── CandidateElementManagerTest.java │ │ ├── CandidateElementTest.java │ │ ├── CrawlControllerTest.java │ │ ├── CrawlerStopTest.java │ │ ├── CrawlerTest.java │ │ ├── ExitNotifierTest.java │ │ ├── IFrameTest.java │ │ ├── NestedFramesTest.java │ │ ├── PassBasicHttpAuthTest.java │ │ ├── PopUpTest.java │ │ ├── configuration │ │ │ ├── CrawlActionsTest.java │ │ │ ├── CrawlElementMatcher.java │ │ │ ├── CrawljaxConfigurationBuilderTest.java │ │ │ ├── DefaultCrawlScopeTest.java │ │ │ ├── UnderXPathTest.java │ │ │ └── XPathEscapeApostropheTest.java │ │ ├── largetests │ │ │ ├── LargeChromeTest.java │ │ │ ├── LargeFirefoxTest.java │ │ │ ├── LargeIETest.java │ │ │ ├── LargePhantomJSTest.java │ │ │ └── LargeTestBase.java │ │ ├── plugin │ │ │ ├── OnFireEventFailedPluginTest.java │ │ │ ├── PluginsTest.java │ │ │ └── PluginsWithCrawlerTest.java │ │ └── state │ │ │ ├── ElementTest.java │ │ │ ├── EventableTest.java │ │ │ ├── PostCrawlStateGraphChecker.java │ │ │ ├── StateFlowGraphTest.java │ │ │ ├── StateMachineTest.java │ │ │ ├── StateVertexFactoryTest.java │ │ │ ├── StateVertexTest.java │ │ │ └── StatesContainElementsTest.java │ │ ├── crawls │ │ ├── CrawlConcurrently.java │ │ ├── CrawlHiddenElementsTest.java │ │ └── CrawlWithCustomScopeTest.java │ │ ├── forms │ │ ├── FormHandlerTest.java │ │ └── RandomInputValueGeneratorTest.java │ │ ├── oracle │ │ └── OracleTest.java │ │ ├── oraclecomparator │ │ └── comparators │ │ │ └── EditDistanceTest.java │ │ ├── test │ │ ├── BaseCrawler.java │ │ ├── BrowserTest.java │ │ ├── RunWithWebServer.java │ │ ├── Utils.java │ │ ├── WebServer.java │ │ ├── WebServerTest.java │ │ └── matchers │ │ │ └── FileMatcher.java │ │ └── util │ │ ├── DOMComparerTest.java │ │ ├── DomUtilsBrowserTest.java │ │ ├── DomUtilsTest.java │ │ ├── HtmlNamespaceTest.java │ │ ├── UrlUtilsTest.java │ │ ├── XMLObjectTest.java │ │ ├── XPathHelperTest.java │ │ └── XmlunitDifferenceTest.java │ └── resources │ ├── candidateElementExtractorTest │ ├── domWithFourTypeDownloadLink.html │ └── domWithOneExternalAndTwoInternal.html │ ├── configuration │ └── crawljax.properties │ ├── demo-site │ ├── conditions.html │ ├── crawlConditions.html │ ├── customInput.html │ ├── forms.html │ ├── home.html │ ├── img │ │ └── nav.gif │ ├── index.html │ ├── info.html │ ├── invariants.html │ ├── js │ │ └── general.js │ ├── lib │ │ └── jquery-2.0.3.min.js │ ├── oracleComparators.html │ ├── papers.html │ ├── plugins.html │ ├── randomInput.html │ ├── select.html │ ├── style.css │ └── waitConditions.html │ ├── logback-test.xml │ ├── realm.properties │ ├── site │ ├── concurrentcrawl1 │ │ ├── index.html │ │ ├── page_a.html │ │ ├── page_b.html │ │ └── page_c.html │ ├── concurrentcrawl2 │ │ ├── index.html │ │ ├── page_x.html │ │ ├── page_y.html │ │ └── page_z.html │ ├── crawlconditions │ │ └── index.html │ ├── crawler │ │ ├── index.html │ │ ├── payload_10.html │ │ ├── payload_11.html │ │ ├── payload_2.html │ │ ├── payload_3.html │ │ ├── payload_4.html │ │ ├── payload_5.html │ │ ├── payload_6.html │ │ ├── payload_7.html │ │ ├── payload_8.html │ │ └── payload_9.html │ ├── crawlscope │ │ ├── in_scope.html │ │ ├── in_scope_inner.html │ │ ├── index.html │ │ ├── out_of_scope.html │ │ └── out_of_scope_inner.html │ ├── download │ │ ├── download.blob │ │ └── download.html │ ├── formhandler │ │ └── index.html │ ├── hidden-elements-site │ │ ├── a.html │ │ ├── b.html │ │ └── index.html │ ├── home.html │ ├── iframe │ │ ├── iframe.html │ │ ├── iframe2.html │ │ ├── index.html │ │ ├── page0-0-0.html │ │ ├── page0-0.html │ │ ├── page0.html │ │ └── subiframe.html │ ├── index.html │ ├── infinite.html │ ├── js │ │ └── general.js │ ├── lib │ │ └── jquery-2.0.3.min.js │ ├── navigate_other_urls.html │ ├── popup │ │ └── index.html │ ├── simple.html │ ├── simplelink │ │ └── simplelink.html │ ├── testCrawlElementCondition.html │ ├── testCrawlElements.html │ ├── testCrawlconditions.html │ ├── testInvariants.html │ ├── testOracleComparators.html │ ├── testWaitCondition.html │ └── underxpath.html │ └── util │ ├── domtest.html │ └── state-10.html ├── eclipse-formatter.xml ├── examples ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── crawljax │ │ └── examples │ │ ├── AdvancedExample.java │ │ ├── CrawlScopeExample.java │ │ ├── InvariantExample.java │ │ ├── MetricPluginExample.java │ │ ├── PluginExample.java │ │ └── SimplestExample.java │ └── resources │ └── logback.xml ├── plugins ├── README.md ├── crawloverview-plugin │ ├── .gitignore │ ├── .travis.yml │ ├── README.md │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── crawljax │ │ │ │ └── plugins │ │ │ │ └── crawloverview │ │ │ │ ├── BeanToReadableMap.java │ │ │ │ ├── CrawlOverview.java │ │ │ │ ├── CrawlOverviewException.java │ │ │ │ ├── ImageWriter.java │ │ │ │ ├── OutPutModelCache.java │ │ │ │ ├── OutputBuilder.java │ │ │ │ ├── StateBuilder.java │ │ │ │ ├── StateWriter.java │ │ │ │ └── model │ │ │ │ ├── CandidateElementPosition.java │ │ │ │ ├── Edge.java │ │ │ │ ├── OutPutModel.java │ │ │ │ ├── Serializer.java │ │ │ │ ├── State.java │ │ │ │ ├── StateCounter.java │ │ │ │ ├── StateStatistics.java │ │ │ │ └── Statistics.java │ │ └── resources │ │ │ ├── config.html │ │ │ ├── header.html │ │ │ ├── index.html │ │ │ ├── nav.html │ │ │ ├── plugin-descriptor.xml │ │ │ ├── skeleton │ │ │ ├── css │ │ │ │ ├── bootstrap-responsive.min.css │ │ │ │ ├── bootstrap.min.css │ │ │ │ ├── shCore.css │ │ │ │ └── shCoreDefault.css │ │ │ ├── img │ │ │ │ ├── glyphicons-halflings-white.png │ │ │ │ └── glyphicons-halflings.png │ │ │ ├── js │ │ │ │ └── graphbuilder.js │ │ │ └── lib │ │ │ │ ├── bootstrap.min.js │ │ │ │ ├── jquery-2.0.3.min.js │ │ │ │ ├── shBrushXml.js │ │ │ │ ├── shCore.js │ │ │ │ └── vivagraph.min.js │ │ │ ├── state.html │ │ │ ├── statistics.html │ │ │ ├── urls.html │ │ │ └── version.html │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── crawljax │ │ │ ├── matchers │ │ │ └── IsValidJson.java │ │ │ └── plugins │ │ │ └── crawloverview │ │ │ ├── BeanToReadableMapTest.java │ │ │ ├── CandidateElementMatcher.java │ │ │ ├── HoverTest.java │ │ │ ├── ImageWriterTest.java │ │ │ ├── OutputBuilderTest.java │ │ │ ├── OverviewIntegrationTest.java │ │ │ ├── RunHoverCrawl.java │ │ │ ├── SimpleSiteCrawlTest.java │ │ │ ├── TestBean.java │ │ │ └── model │ │ │ └── SerializeTest.java │ │ └── resources │ │ ├── hover-test-site │ │ ├── a.html │ │ ├── b.html │ │ ├── c.html │ │ └── index.html │ │ ├── logback-test.xml │ │ ├── sampleOutModel.json │ │ └── screenshot.png ├── pom.xml └── test-plugin │ ├── pom.xml │ └── src │ └── main │ ├── java │ └── com │ │ └── crawljax │ │ └── plugins │ │ └── testplugin │ │ ├── Runner.java │ │ └── TestPlugin.java │ └── resources │ └── plugin-descriptor.xml ├── pom.xml └── test-utils ├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── crawljax │ │ ├── crawltests │ │ ├── SimpleInputSiteCrawl.java │ │ ├── SimpleJsSiteCrawl.java │ │ ├── SimpleSiteCrawl.java │ │ └── SimpleXpathCrawl.java │ │ └── rules │ │ └── TempDirInTargetFolder.java └── resources │ └── sites │ ├── lib │ └── jquery-2.0.3.min.js │ ├── simple-input-site │ ├── index.html │ └── otherState.html │ ├── simple-js-site │ ├── index.html │ ├── payload_10.html │ ├── payload_11.html │ ├── payload_2.html │ ├── payload_3.html │ ├── payload_4.html │ ├── payload_5.html │ ├── payload_6.html │ ├── payload_7.html │ ├── payload_8.html │ └── payload_9.html │ ├── simple-site │ ├── a.html │ ├── b.html │ ├── c.html │ └── index.html │ └── simple-xpath-site │ ├── a.html │ ├── b.html │ ├── index-iframe.html │ └── index.html └── test └── java └── com └── crawljax └── crawljax_plugins_plugin └── SampleCrawlersTest.java /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: Java CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | java: [8, 11] 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | - uses: actions/setup-java@v1 19 | with: 20 | java-version: ${{ matrix.java }} 21 | - uses: actions/cache@v2 22 | with: 23 | path: | 24 | ~/.m2/repository 25 | key: ${{ runner.os }}-gradle-${{ matrix.java }}-${{ hashFiles('**/pom.xml') }} 26 | restore-keys: | 27 | ${{ runner.os }}-gradle-${{ matrix.java }}- 28 | ${{ runner.os }}-gradle- 29 | - run: | 30 | mkdir geckodriver 31 | GECKODRIVER_VER="0.29.0"; wget -qO - https://github.com/mozilla/geckodriver/releases/download/v$GECKODRIVER_VER/geckodriver-v$GECKODRIVER_VER-linux64.tar.gz | tar xz -C geckodriver 32 | export PATH=$PATH:$PWD/geckodriver 33 | - run: | 34 | cd core 35 | mvn install -DskipTests=true -Dmaven.javadoc.skip=true -Dgpg.skip=true -B -V 36 | mvn test -B -Pintegrationtests -Dtest.browser=FIREFOX 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Eclipse 2 | .classpath 3 | .project 4 | .settings/ 5 | 6 | # Intellij 7 | .idea/ 8 | *.iml 9 | *.iws 10 | *.ipr 11 | 12 | # Maven 13 | *.log 14 | target/ 15 | release.properties 16 | pom.xml.releaseBackup 17 | git.properties 18 | output/ 19 | 20 | # Mac 21 | .DS_Store -------------------------------------------------------------------------------- /cli/README.md: -------------------------------------------------------------------------------- 1 | # Crawljax Command-line 2 | This is the Command-line distribution of Crawljax. The project is assembled in a ZIP file containing the jar that you can run to execute the crawler. 3 | 4 | 5 | Unzip the zip and in the resulting folder you can run Crawljax as follows: 6 | 7 | ``` 8 | usage: java -jar crawljax-cli-version.jar theUrl theOutputDir 9 | -a,--crawlHiddenAnchors Crawl anchors even if they are not visible in the 10 | browser. 11 | -b,--browser browser type: firefox, ie, chrome, remote, 12 | htmlunit, android, iphone. Default is Firefox 13 | -click a comma separated list of HTML tags that should 14 | be clicked. Default is A and BUTTON 15 | -d,--depth crawl depth level. Default is 2 16 | -h,--help print this message 17 | -log Log to this file instead of the console 18 | -o,--override Override the output directory if non-empty 19 | -p,--parallel Number of browsers to use for crawling. Default 20 | is 1 21 | -s,--maxstates max number of states to crawl. Default is 0 22 | (unlimited) 23 | -t,--timeout Specify the maximum crawl time in minutes 24 | -v,--verbose Be extra verbose 25 | -version print the version information and exit 26 | -waitAfterEvent the time to wait after an event has been fired in 27 | milliseconds. Default is 500 28 | -waitAfterReload the time to wait after an URL has been loaded in 29 | milliseconds. Default is 500 30 | ``` 31 | 32 | The output folder will containt the output of the Crawl overview plugin. -------------------------------------------------------------------------------- /cli/src/main/java/com/crawljax/cli/LogUtil.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.cli; 2 | 3 | import org.slf4j.LoggerFactory; 4 | 5 | import ch.qos.logback.classic.Level; 6 | import ch.qos.logback.classic.Logger; 7 | import ch.qos.logback.classic.spi.ILoggingEvent; 8 | import ch.qos.logback.core.ConsoleAppender; 9 | import ch.qos.logback.core.FileAppender; 10 | import ch.qos.logback.core.encoder.Encoder; 11 | 12 | public class LogUtil { 13 | 14 | /** 15 | * Configure file logging and stop console logging. 16 | * 17 | * @param filename 18 | * Log to this file. 19 | */ 20 | @SuppressWarnings("unchecked") 21 | static void logToFile(String filename) { 22 | Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME); 23 | 24 | FileAppender fileappender = new FileAppender<>(); 25 | fileappender.setContext(rootLogger.getLoggerContext()); 26 | fileappender.setFile(filename); 27 | fileappender.setName("FILE"); 28 | 29 | ConsoleAppender console = (ConsoleAppender) rootLogger.getAppender("STDOUT"); 30 | fileappender.setEncoder((Encoder) console.getEncoder()); 31 | 32 | fileappender.start(); 33 | 34 | rootLogger.addAppender(fileappender); 35 | 36 | console.stop(); 37 | } 38 | 39 | /** 40 | * @param newLevel 41 | * for com.crawljax.* 42 | */ 43 | static void setCrawljaxLogLevel(Level newLevel) { 44 | Logger rootLogger = (Logger) LoggerFactory.getLogger("com.crawljax"); 45 | rootLogger.setLevel(newLevel); 46 | } 47 | 48 | private LogUtil() { 49 | 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /cli/src/main/resources/jar-with-dependencies.xml: -------------------------------------------------------------------------------- 1 | 5 | distribution 6 | 7 | zip 8 | 9 | 10 | 11 | ${project.basedir}/../ 12 | / 13 | 14 | LICENSE 15 | 16 | 17 | 18 | ${project.basedir}/../ 19 | /crawljax-doc 20 | 21 | CHANGELOG* 22 | README* 23 | 24 | 25 | 26 | ${project.basedir} 27 | / 28 | 29 | README* 30 | 31 | 32 | 33 | src/main/resources 34 | 35 | 36 | logback.xml 37 | 38 | 39 | 40 | target 41 | 42 | 43 | *.jar 44 | 45 | 46 | 47 | 48 | 49 | /lib 50 | false 51 | runtime 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /cli/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | %d{HH:mm:ss.SSS} [%thread] %-5level - %msg%n 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /cli/src/main/resources/project.version: -------------------------------------------------------------------------------- 1 | Version = ${project.version} 2 | Git commit id = ${git.commit.id} 3 | Git describe = ${git.commit.id.describe} 4 | Build at ${git.build.time} -------------------------------------------------------------------------------- /cli/src/test/java/com/crawljax/test/util/CaptureSystemStreams.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.test.util; 2 | 3 | import java.io.ByteArrayOutputStream; 4 | import java.io.PrintStream; 5 | 6 | import org.junit.rules.ExternalResource; 7 | 8 | /** 9 | * Changes the {@link System#out} and {@link System#err} to a captured stream so we can inspect it 10 | * in a test. In the {@link #after()} method the streams are switched back to the default stream. 11 | */ 12 | public class CaptureSystemStreams extends ExternalResource { 13 | 14 | private ByteArrayOutputStream captureErrorStream = new ByteArrayOutputStream(); 15 | private ByteArrayOutputStream captureOutStream = new ByteArrayOutputStream(); 16 | private PrintStream originalErrorStream; 17 | private PrintStream originalOutStream; 18 | 19 | public CaptureSystemStreams() { 20 | } 21 | 22 | @Override 23 | protected void before() throws Throwable { 24 | originalErrorStream = System.err; 25 | originalOutStream = System.out; 26 | System.setErr(new PrintStream(captureErrorStream)); 27 | System.setOut(new PrintStream(captureOutStream)); 28 | } 29 | 30 | @Override 31 | protected void after() { 32 | PrintStream tempErrStream = System.err; 33 | System.setErr(originalErrorStream); 34 | tempErrStream.close(); 35 | 36 | PrintStream tempOutStream = System.out; 37 | System.setOut(originalOutStream); 38 | tempOutStream.close(); 39 | } 40 | 41 | public String getConsoleOutput() { 42 | return captureOutStream.toString(); 43 | } 44 | 45 | public String getErrorOutput() { 46 | return captureErrorStream.toString(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/browser/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This is the core package for the browsers. 3 | */ 4 | package com.crawljax.browser; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/Condition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition; 2 | 3 | import net.jcip.annotations.ThreadSafe; 4 | 5 | import com.crawljax.browser.EmbeddedBrowser; 6 | 7 | /** 8 | * A condition is a condition which can be tested on the current state in the browser. 9 | * 10 | * @author dannyroest@gmail.com (Danny Roest) 11 | */ 12 | @ThreadSafe 13 | public interface Condition { 14 | 15 | /** 16 | * @param browser 17 | * The browser. 18 | * @return whether the evaluated condition is satisfied 19 | */ 20 | boolean check(EmbeddedBrowser browser); 21 | 22 | } 23 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/NotRegexCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition; 2 | 3 | import net.jcip.annotations.Immutable; 4 | 5 | import java.util.Objects; 6 | 7 | import com.crawljax.browser.EmbeddedBrowser; 8 | import com.google.common.base.MoreObjects; 9 | 10 | /** 11 | * A condition which returns true iff the expression does NOT occur in the DOM. 12 | * 13 | * @author dannyroest@gmail.com (Danny Roest) 14 | */ 15 | @Immutable 16 | public class NotRegexCondition implements Condition { 17 | 18 | private final RegexCondition regexCondition; 19 | 20 | /** 21 | * @param expression 22 | * the regular expression. 23 | */ 24 | public NotRegexCondition(String expression) { 25 | this.regexCondition = new RegexCondition(expression); 26 | } 27 | 28 | @Override 29 | public boolean check(EmbeddedBrowser browser) { 30 | return Logic.not(regexCondition).check(browser); 31 | } 32 | 33 | @Override 34 | public int hashCode() { 35 | return Objects.hash(getClass(), regexCondition); 36 | } 37 | 38 | @Override 39 | public boolean equals(Object object) { 40 | if (object instanceof NotRegexCondition) { 41 | NotRegexCondition that = (NotRegexCondition) object; 42 | return Objects.equals(this.regexCondition, that.regexCondition); 43 | } 44 | return false; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return MoreObjects.toStringHelper(this) 50 | .add("regexCondition", regexCondition) 51 | .toString(); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/NotUrlCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition; 2 | 3 | import net.jcip.annotations.Immutable; 4 | 5 | import java.util.Objects; 6 | 7 | import com.crawljax.browser.EmbeddedBrowser; 8 | import com.google.common.base.MoreObjects; 9 | 10 | /** 11 | * Conditions that returns true iff the browser's current url NOT contains url. Note: Case 12 | * insensitive. 13 | * 14 | * @author dannyroest@gmail.com (Danny Roest) 15 | */ 16 | @Immutable 17 | public class NotUrlCondition implements Condition { 18 | 19 | private final UrlCondition urlCondition; 20 | 21 | /** 22 | * @param url 23 | * the URL. 24 | */ 25 | public NotUrlCondition(String url) { 26 | this.urlCondition = new UrlCondition(url); 27 | } 28 | 29 | @Override 30 | public boolean check(EmbeddedBrowser browser) { 31 | return Logic.not(urlCondition).check(browser); 32 | } 33 | 34 | @Override 35 | public int hashCode() { 36 | return Objects.hash(getClass(), urlCondition); 37 | } 38 | 39 | @Override 40 | public boolean equals(Object object) { 41 | if (object instanceof NotUrlCondition) { 42 | NotUrlCondition that = (NotUrlCondition) object; 43 | return Objects.equals(this.urlCondition, that.urlCondition); 44 | } 45 | return false; 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return MoreObjects.toStringHelper(this) 51 | .add("urlCondition", urlCondition) 52 | .toString(); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/NotVisibleCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition; 2 | 3 | import net.jcip.annotations.Immutable; 4 | 5 | import java.util.Objects; 6 | 7 | import com.crawljax.browser.EmbeddedBrowser; 8 | import com.crawljax.core.state.Identification; 9 | import com.google.common.base.MoreObjects; 10 | 11 | /** 12 | * Conditions that returns true iff element found with By is visible. 13 | * 14 | * @author dannyroest@gmail.com (Danny Roest) 15 | */ 16 | @Immutable 17 | public class NotVisibleCondition implements Condition { 18 | 19 | private final VisibleCondition visibleCondition; 20 | 21 | /** 22 | * @param identification 23 | * the identification. 24 | */ 25 | public NotVisibleCondition(Identification identification) { 26 | this.visibleCondition = new VisibleCondition(identification); 27 | } 28 | 29 | @Override 30 | public boolean check(EmbeddedBrowser browser) { 31 | return Logic.not(visibleCondition).check(browser); 32 | } 33 | 34 | @Override 35 | public int hashCode() { 36 | return Objects.hash(getClass(), visibleCondition); 37 | } 38 | 39 | @Override 40 | public boolean equals(Object object) { 41 | if (object instanceof NotVisibleCondition) { 42 | NotVisibleCondition that = (NotVisibleCondition) object; 43 | return Objects.equals(this.visibleCondition, that.visibleCondition); 44 | } 45 | return false; 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return MoreObjects.toStringHelper(this) 51 | .add("visibleCondition", visibleCondition) 52 | .toString(); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/NotXPathCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition; 2 | 3 | import net.jcip.annotations.Immutable; 4 | 5 | import java.util.Objects; 6 | 7 | import com.crawljax.browser.EmbeddedBrowser; 8 | import com.google.common.base.MoreObjects; 9 | 10 | /** 11 | * Condition that returns true iff no elements are found with expression. 12 | * 13 | * @author dannyroest@gmail.com (Danny Roest) 14 | */ 15 | @Immutable 16 | public class NotXPathCondition implements Condition { 17 | 18 | private final XPathCondition xpathCondition; 19 | 20 | /** 21 | * @param expression 22 | * the XPath expression. 23 | */ 24 | public NotXPathCondition(String expression) { 25 | this.xpathCondition = new XPathCondition(expression); 26 | } 27 | 28 | @Override 29 | public boolean check(EmbeddedBrowser browser) { 30 | return Logic.not(xpathCondition).check(browser); 31 | } 32 | 33 | @Override 34 | public int hashCode() { 35 | return Objects.hash(getClass(), xpathCondition); 36 | } 37 | 38 | @Override 39 | public boolean equals(Object object) { 40 | if (object instanceof NotXPathCondition) { 41 | NotXPathCondition that = (NotXPathCondition) object; 42 | return Objects.equals(this.xpathCondition, that.xpathCondition); 43 | } 44 | return false; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return MoreObjects.toStringHelper(this) 50 | .add("xpathCondition", xpathCondition) 51 | .toString(); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/UrlCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition; 2 | 3 | import net.jcip.annotations.Immutable; 4 | 5 | import java.util.Objects; 6 | 7 | import com.crawljax.browser.EmbeddedBrowser; 8 | import com.google.common.base.MoreObjects; 9 | 10 | /** 11 | * Conditions that returns true iff the browser's current url contains url. Note: Case insensitive 12 | * 13 | * @author dannyroest@gmail.com (Danny Roest) 14 | */ 15 | @Immutable 16 | public class UrlCondition implements Condition { 17 | 18 | private final String url; 19 | 20 | /** 21 | * @param url 22 | * the URL. 23 | */ 24 | public UrlCondition(String url) { 25 | this.url = url; 26 | } 27 | 28 | @Override 29 | public boolean check(EmbeddedBrowser browser) { 30 | return browser.getCurrentUrl().toLowerCase().contains(url); 31 | } 32 | 33 | @Override 34 | public int hashCode() { 35 | return Objects.hash(getClass(), url); 36 | } 37 | 38 | @Override 39 | public boolean equals(Object object) { 40 | if (object instanceof UrlCondition) { 41 | UrlCondition that = (UrlCondition) object; 42 | return Objects.equals(this.url, that.url); 43 | } 44 | return false; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return MoreObjects.toStringHelper(this) 50 | .add("url", url) 51 | .toString(); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/VisibleCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition; 2 | 3 | import net.jcip.annotations.Immutable; 4 | 5 | import java.util.Objects; 6 | 7 | import com.crawljax.browser.EmbeddedBrowser; 8 | import com.crawljax.core.state.Identification; 9 | import com.google.common.base.MoreObjects; 10 | 11 | /** 12 | * Conditions that returns true iff element found by given identification is visible. 13 | */ 14 | @Immutable 15 | public class VisibleCondition implements Condition { 16 | 17 | private final Identification identification; 18 | 19 | /** 20 | * @param identification 21 | * the identification. 22 | */ 23 | public VisibleCondition(Identification identification) { 24 | this.identification = identification; 25 | } 26 | 27 | @Override 28 | public boolean check(EmbeddedBrowser browser) { 29 | return browser.isVisible(identification); 30 | } 31 | 32 | @Override 33 | public String toString() { 34 | return MoreObjects.toStringHelper(this) 35 | .add("identification", identification) 36 | .toString(); 37 | } 38 | 39 | @Override 40 | public int hashCode() { 41 | return Objects.hash(getClass(), identification); 42 | } 43 | 44 | @Override 45 | public boolean equals(Object object) { 46 | if (object instanceof VisibleCondition) { 47 | VisibleCondition that = (VisibleCondition) object; 48 | return Objects.equals(this.identification, that.identification); 49 | } 50 | return false; 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/browserwaiter/ExpectedCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition.browserwaiter; 2 | 3 | import net.jcip.annotations.ThreadSafe; 4 | 5 | import com.crawljax.browser.EmbeddedBrowser; 6 | 7 | /** 8 | * Interface for defining conditions to wait for. 9 | * 10 | * @author dannyroest@gmail.com (Danny Roest) 11 | */ 12 | @ThreadSafe 13 | public interface ExpectedCondition { 14 | 15 | /** 16 | * Is the expected condition satisfied. 17 | * 18 | * @param browser 19 | * the browser to execute the check on 20 | * @return Whether the condition is satisfied. 21 | */ 22 | boolean isSatisfied(EmbeddedBrowser browser); 23 | 24 | } 25 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/browserwaiter/ExpectedElementCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition.browserwaiter; 2 | 3 | import net.jcip.annotations.ThreadSafe; 4 | 5 | import com.crawljax.browser.EmbeddedBrowser; 6 | import com.crawljax.core.state.Identification; 7 | 8 | /** 9 | * Checks whether an elements exists. 10 | * 11 | * @author dannyroest@gmail.com (Danny Roest) 12 | */ 13 | @ThreadSafe 14 | public class ExpectedElementCondition implements ExpectedCondition { 15 | 16 | private final Identification identification; 17 | 18 | /** 19 | * Constructor. 20 | * 21 | * @param identification 22 | * the identification to use. 23 | */ 24 | public ExpectedElementCondition(Identification identification) { 25 | this.identification = identification; 26 | } 27 | 28 | @Override 29 | public boolean isSatisfied(EmbeddedBrowser browser) { 30 | return browser.elementExists(identification); 31 | } 32 | 33 | @Override 34 | public String toString() { 35 | return this.getClass().getSimpleName() + ": " + this.identification; 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/browserwaiter/ExpectedVisibleCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition.browserwaiter; 2 | 3 | import net.jcip.annotations.ThreadSafe; 4 | 5 | import com.crawljax.browser.EmbeddedBrowser; 6 | import com.crawljax.core.state.Identification; 7 | 8 | /** 9 | * Checks whether an element is visible. 10 | * 11 | * @author dannyroest@gmail.com (Danny Roest) 12 | */ 13 | @ThreadSafe 14 | public class ExpectedVisibleCondition implements ExpectedCondition { 15 | 16 | private final Identification identification; 17 | 18 | /** 19 | * Constructor. 20 | * 21 | * @param identification 22 | * identification to use. 23 | */ 24 | public ExpectedVisibleCondition(Identification identification) { 25 | this.identification = identification; 26 | } 27 | 28 | @Override 29 | public boolean isSatisfied(EmbeddedBrowser browser) { 30 | return browser.isVisible(identification); 31 | } 32 | 33 | @Override 34 | public String toString() { 35 | return this.getClass().getSimpleName() + ": " + this.identification; 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/browserwaiter/WaitConditionChecker.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition.browserwaiter; 2 | 3 | import java.util.List; 4 | 5 | import javax.inject.Inject; 6 | 7 | import net.jcip.annotations.ThreadSafe; 8 | 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import com.crawljax.browser.EmbeddedBrowser; 13 | import com.crawljax.core.configuration.CrawlRules; 14 | import com.google.common.collect.ImmutableList; 15 | 16 | /** 17 | * Checks the wait conditions. 18 | */ 19 | @ThreadSafe 20 | public class WaitConditionChecker { 21 | 22 | private static final Logger LOGGER = LoggerFactory.getLogger(WaitConditionChecker.class 23 | .getName()); 24 | 25 | private ImmutableList waitConditions; 26 | 27 | @Inject 28 | public WaitConditionChecker(CrawlRules rules) { 29 | waitConditions = rules.getPreCrawlConfig().getWaitConditions(); 30 | } 31 | 32 | /** 33 | * @return the waitConditions 34 | */ 35 | public List getWaitConditions() { 36 | return waitConditions; 37 | } 38 | 39 | /** 40 | * @param browser 41 | * The browser to use. 42 | */ 43 | public void wait(EmbeddedBrowser browser) { 44 | if (waitConditions == null) { 45 | return; 46 | } 47 | for (WaitCondition waitCondition : waitConditions) { 48 | LOGGER.info("Checking WaitCondition for url: {}", waitCondition.getUrl()); 49 | waitCondition.testAndWait(browser); 50 | } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/browserwaiter/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Waiter utils. 3 | */ 4 | package com.crawljax.condition.browserwaiter; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/crawlcondition/CrawlCondition.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition.crawlcondition; 2 | 3 | import java.util.List; 4 | 5 | import com.crawljax.condition.Condition; 6 | import com.crawljax.condition.ConditionType; 7 | 8 | /** 9 | * A Crawl condition is a condition which should be satisfied in order to crawl the current state. 10 | * 11 | * @author Danny 12 | */ 13 | public class CrawlCondition extends ConditionType { 14 | 15 | /** 16 | * @param description 17 | * Description of the condition. 18 | * @param crawlConditionCondition 19 | * Actual condition. 20 | */ 21 | public CrawlCondition(String description, Condition crawlConditionCondition) { 22 | super(description, crawlConditionCondition); 23 | } 24 | 25 | /** 26 | * @param description 27 | * Description of the condition. 28 | * @param crawlConditionCondition 29 | * Actual condition. 30 | * @param preConditions 31 | * Preconditions to check first. 32 | */ 33 | public CrawlCondition(String description, Condition crawlConditionCondition, 34 | Condition... preConditions) { 35 | super(description, crawlConditionCondition, preConditions); 36 | } 37 | 38 | /** 39 | * @param description 40 | * Description of the condition. 41 | * @param crawlConditionCondition 42 | * Actual condition. 43 | * @param preConditions 44 | * Preconditions to check first. 45 | */ 46 | public CrawlCondition(String description, Condition crawlConditionCondition, 47 | List preConditions) { 48 | super(description, crawlConditionCondition, preConditions); 49 | } 50 | 51 | /** 52 | * @return Returns the actual crawl condition. 53 | */ 54 | public Condition getCrawlCondition() { 55 | return getCondition(); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/crawlcondition/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Crawlcondition package. 3 | */ 4 | package com.crawljax.condition.crawlcondition; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/eventablecondition/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Eventable condition utils. 3 | */ 4 | package com.crawljax.condition.eventablecondition; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/condition/invariant/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Invariant util package. 3 | */ 4 | package com.crawljax.condition.invariant; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/CandidateCrawlAction.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | import com.crawljax.core.state.Eventable.EventType; 4 | import com.google.common.base.MoreObjects; 5 | 6 | /** 7 | * This class corresponds the combination of a CandidateElement and a single 8 | * eventType. 9 | * 10 | * @author Stefan Lenselink <S.R.Lenselink@student.tudelft.nl> 11 | */ 12 | public class CandidateCrawlAction { 13 | private final CandidateElement candidateElement; 14 | private final EventType eventType; 15 | 16 | /** 17 | * The Constructor for the CandidateCrawlAction, build a new instance with 18 | * the CandidateElement and the EventType. 19 | * 20 | * @param candidateElement 21 | * the element to execute the eventType on 22 | * @param eventType 23 | * the eventType to execute on the Candidate Element. 24 | */ 25 | public CandidateCrawlAction(CandidateElement candidateElement, 26 | EventType eventType) { 27 | this.candidateElement = candidateElement; 28 | this.eventType = eventType; 29 | } 30 | 31 | /** 32 | * @return the candidateElement 33 | */ 34 | public CandidateElement getCandidateElement() { 35 | return candidateElement; 36 | } 37 | 38 | /** 39 | * @return the eventType 40 | */ 41 | public EventType getEventType() { 42 | return eventType; 43 | } 44 | 45 | @Override 46 | public String toString() { 47 | return MoreObjects.toStringHelper(this) 48 | .add("candidateElement", candidateElement) 49 | .add("eventType", eventType).toString(); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/CrawlSessionNotSetupYetException.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | import com.crawljax.core.state.StateVertex; 4 | import com.crawljax.di.CrawlSessionProvider; 5 | 6 | /** 7 | * {@link CrawljaxException} that is thrown when you call 8 | * {@link CrawlSessionProvider#get()} before the the initial (index) 9 | * {@link StateVertex} is crawled. Only after the index is crawled will the 10 | * {@link CrawlSession} be available. 11 | */ 12 | @SuppressWarnings("serial") 13 | public class CrawlSessionNotSetupYetException extends CrawljaxException { 14 | 15 | public CrawlSessionNotSetupYetException() { 16 | super( 17 | "The crawlsession is not yet availeble. Wait until the index state is crawled."); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/CrawlerLeftDomainException.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | /** 4 | * Is thrown when the browser leaves the domain/scope while crawling. 5 | */ 6 | @SuppressWarnings("serial") 7 | public class CrawlerLeftDomainException extends CrawljaxException { 8 | 9 | public CrawlerLeftDomainException(String currentUrl) { 10 | super("Somehow we left the domain/scope to " + currentUrl); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/CrawljaxException.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | /** 4 | * General exception for Crawljax. 5 | */ 6 | public class CrawljaxException extends RuntimeException { 7 | 8 | private static final long serialVersionUID = 8597985648361590779L; 9 | 10 | /** 11 | * Constructs a ContractorException with null as its detail message. 12 | */ 13 | public CrawljaxException() { 14 | super(); 15 | } 16 | 17 | /** 18 | * Constructs a new CrawljaxException with the specified detail message. 19 | * 20 | * @param message 21 | * the detail message. 22 | */ 23 | public CrawljaxException(final String message) { 24 | super(message); 25 | } 26 | 27 | /** 28 | * Constructs a new CrawljaxException with the specified detail message and cause. 29 | * 30 | * @param message 31 | * the detail message. 32 | * @param cause 33 | * the cause (A null value is permitted, and indicates that the cause is nonexistent 34 | * or unknown). 35 | */ 36 | public CrawljaxException(final String message, final Throwable cause) { 37 | super(message, cause); 38 | } 39 | 40 | /** 41 | * Constructs a new CrawljaxException with the specified cause and a detail message 42 | * of (cause==null ? null : 43 | * cause.toString()) 44 | * 45 | * @param cause 46 | * the cause (A null value is permitted, and indicates that the cause is nonexistent 47 | * or unknown). 48 | */ 49 | public CrawljaxException(final Throwable cause) { 50 | super(cause); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/CrawljaxRunner.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | import static com.google.common.base.Preconditions.checkNotNull; 4 | 5 | import java.util.concurrent.Callable; 6 | 7 | import com.crawljax.core.ExitNotifier.ExitStatus; 8 | import com.crawljax.core.configuration.CrawljaxConfiguration; 9 | import com.crawljax.core.plugin.PostCrawlingPlugin; 10 | import com.crawljax.di.CoreModule; 11 | import com.google.inject.Guice; 12 | import com.google.inject.Injector; 13 | 14 | /** 15 | * Runs crawljax given a certain {@link CrawljaxConfiguration}. Run {@link #call()} to start a 16 | * crawl. 17 | */ 18 | public class CrawljaxRunner implements Callable { 19 | 20 | private final CrawljaxConfiguration config; 21 | private CrawlController controller; 22 | private ExitStatus reason; 23 | 24 | public CrawljaxRunner(CrawljaxConfiguration config) { 25 | this.config = config; 26 | } 27 | 28 | /** 29 | * Runs Crawljax with the given configuration. 30 | * 31 | * @return The {@link CrawlSession} once the Crawl is done. 32 | */ 33 | @Override 34 | public CrawlSession call() { 35 | Injector injector = Guice.createInjector(new CoreModule(config)); 36 | controller = injector.getInstance(CrawlController.class); 37 | CrawlSession session = controller.call(); 38 | reason = controller.getReason(); 39 | return session; 40 | } 41 | 42 | /** 43 | * Stops Crawljax. It will try to shutdown gracefully and run the {@link PostCrawlingPlugin}s. 44 | */ 45 | public void stop() { 46 | checkNotNull(controller, "Cannot stop Crawljax if you haven't started it"); 47 | controller.stop(); 48 | } 49 | 50 | /** 51 | * @return The {@link ExitStatus} Crawljax stopped or null if it hasn't stopped 52 | * yet. 53 | */ 54 | public ExitStatus getReason() { 55 | return reason; 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/StateUnreachableException.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | import com.crawljax.core.state.StateVertex; 4 | 5 | /** 6 | * Gets thrown when Crawljax cannot get to a target {@link StateVertex}. 7 | */ 8 | @SuppressWarnings("serial") 9 | public class StateUnreachableException extends CrawljaxException { 10 | 11 | private StateVertex target; 12 | 13 | public StateUnreachableException(StateVertex state, String reason) { 14 | super("Cannot reach state " + state.getName() + " because " + reason); 15 | this.target = state; 16 | } 17 | 18 | public StateVertex getTarget() { 19 | return target; 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/configuration/AcceptAllFramesChecker.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | /** 4 | * This class accepts all frames. 5 | * 6 | * @author Stefan Lenselink <slenselink@google.com> 7 | */ 8 | public class AcceptAllFramesChecker implements IgnoreFrameChecker { 9 | @Override 10 | public boolean isFrameIgnored(String frameId) { 11 | return false; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/configuration/ConfigurationHelper.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import java.util.List; 4 | 5 | /** 6 | * Helper class for configurations. 7 | * 8 | * @author Danny Roest (dannyroest@gmail.com) 9 | */ 10 | public final class ConfigurationHelper { 11 | 12 | private ConfigurationHelper() { 13 | } 14 | 15 | /** 16 | * @param items 17 | * The items to be added to the string. 18 | * @return string representation of list. format: a, b, , c. Empty String allowed 19 | */ 20 | public static String listToStringEmptyStringAllowed(List items) { 21 | StringBuilder str = new StringBuilder(); 22 | int i = 0; 23 | for (String item : items) { 24 | if (i > 0) { 25 | str.append(", "); 26 | } 27 | str.append(item); 28 | i++; 29 | } 30 | return str.toString(); 31 | } 32 | 33 | /** 34 | * @param items 35 | * The items to be added to the string. 36 | * @return string representation of list. format: a, b, c 37 | */ 38 | public static String listToString(List items) { 39 | StringBuilder str = new StringBuilder(); 40 | for (Object item : items) { 41 | if (!str.toString().equals("")) { 42 | str.append(", "); 43 | } 44 | str.append(item.toString()); 45 | } 46 | return str.toString(); 47 | } 48 | 49 | /** 50 | * @param value 51 | * The value to be converted 52 | * @return int value of boolean, true=1 false=0 53 | */ 54 | public static int booleanToInt(boolean value) { 55 | if (value) { 56 | return 1; 57 | } else { 58 | return 0; 59 | } 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/configuration/CrawlScope.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | /** 4 | * The crawl scope allows to check if a URL is or not in scope. 5 | *

6 | * URLs in scope are crawled during the crawling process. 7 | * 8 | * @since 3.7 9 | */ 10 | @FunctionalInterface 11 | public interface CrawlScope { 12 | 13 | /** 14 | * Tells whether or not the given URL is in scope. 15 | *

16 | * Called during the crawl process, to know if the crawling process should crawl or backtrack. 17 | * 18 | * @param url 19 | * the URL to check if it's in scope. 20 | * @return {@code true} if the given URL is in scope, {@code false} otherwise. 21 | */ 22 | boolean isInScope(String url); 23 | } 24 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/configuration/DefaultCrawlScope.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import java.net.URI; 4 | import java.util.Objects; 5 | 6 | import com.crawljax.util.UrlUtils; 7 | import com.google.common.base.MoreObjects; 8 | import com.google.common.base.Preconditions; 9 | 10 | /** 11 | * A {@code CrawlScope} that allows to crawl only under a given domain. 12 | * 13 | * @since 3.7 14 | */ 15 | public class DefaultCrawlScope implements CrawlScope { 16 | 17 | private URI url; 18 | 19 | /** 20 | * Constructs a {@code DefaultCrawlScope} with the given URL. 21 | * 22 | * @param url 23 | * the URL with allowed domain, must not be {@code null}. 24 | */ 25 | public DefaultCrawlScope(URI url) { 26 | Preconditions.checkNotNull(url); 27 | this.url = url; 28 | } 29 | 30 | /** 31 | * Gets the URL used for scope check. 32 | * 33 | * @return the URL used for scope check. 34 | */ 35 | public URI getUrl() { 36 | return url; 37 | } 38 | 39 | @Override 40 | public boolean isInScope(String url) { 41 | return UrlUtils.isSameDomain(url, this.url); 42 | } 43 | 44 | @Override 45 | public int hashCode() { 46 | return Objects.hash(url); 47 | } 48 | 49 | @Override 50 | public boolean equals(Object object) { 51 | if (object instanceof DefaultCrawlScope) { 52 | DefaultCrawlScope that = (DefaultCrawlScope) object; 53 | return Objects.equals(this.url, that.url); 54 | } 55 | return false; 56 | } 57 | 58 | @Override 59 | public String toString() { 60 | return MoreObjects.toStringHelper(this).add("url", url).toString(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/configuration/DefaultUnexpectedAlertHandler.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import org.openqa.selenium.WebDriver; 4 | 5 | /** 6 | * An {@link UnexpectedAlertHandler} that does not handle the alert and always retries the action. 7 | *

8 | * Should be used only if the alerts are automatically handled (accepted or dismissed) by the 9 | * selected {@code WebDriver}, for example, through the capability 10 | * {@link org.openqa.selenium.remote.CapabilityType#UNHANDLED_PROMPT_BEHAVIOUR 11 | * CapabilityType.UNHANDLED_PROMPT_BEHAVIOUR}. 12 | * 13 | * @since 3.8 14 | * @see #INSTANCE 15 | */ 16 | public class DefaultUnexpectedAlertHandler implements UnexpectedAlertHandler { 17 | 18 | /** 19 | * The instance of {@code DefaultUnexpectedAlertHandler}. 20 | */ 21 | public static final DefaultUnexpectedAlertHandler INSTANCE = 22 | new DefaultUnexpectedAlertHandler(); 23 | 24 | @Override 25 | public boolean handleAlert(WebDriver browser, String alertText) { 26 | return true; 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/configuration/FormAction.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import com.crawljax.core.state.Eventable.EventType; 4 | 5 | /** 6 | * Represents a form action, e.g. a link that is clicked that handles the form NOTE: In general 7 | * FormAction is not designed to be instantiated directly. 8 | * 9 | * @author DannyRoest@gmail.com (Danny Roest) 10 | */ 11 | public class FormAction { 12 | 13 | private CrawlElement crawlElement; 14 | 15 | /** 16 | * @param tagName 17 | * the tag name of the element 18 | * @return this CrawlElement 19 | */ 20 | public CrawlElement beforeClickElement(String tagName) { 21 | this.crawlElement = new CrawlElement(EventType.click, tagName); 22 | return crawlElement; 23 | } 24 | 25 | /** 26 | * @return the crawlTag 27 | */ 28 | protected CrawlElement getCrawlElement() { 29 | return crawlElement; 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/configuration/IgnoreFrameChecker.java: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | 3 | package com.crawljax.core.configuration; 4 | 5 | /** 6 | * This interface is used to reflect to operation to see if a given frame must 7 | * be ignored. 8 | * 9 | * @author Stefan Lenselink <slenselink@google.com> 10 | */ 11 | public interface IgnoreFrameChecker { 12 | 13 | /** 14 | * Must a given frame identifier be ignored? 15 | * 16 | * @param frameId 17 | * the frame identifier 18 | * @return true if the specified frame identifier must be ignored 19 | */ 20 | boolean isFrameIgnored(String frameId); 21 | } -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/configuration/UnexpectedAlertHandler.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import org.openqa.selenium.UnhandledAlertException; 4 | import org.openqa.selenium.WebDriver; 5 | 6 | /** 7 | * A handler for unexpected/unhandled alerts ({@link UnhandledAlertException}). 8 | *

9 | * Allows {@link com.crawljax.browser.WebDriverBackedEmbeddedBrowser WebDriverBackedEmbeddedBrowser} 10 | * to handle unexpected/unhandled alerts when trying to execute browser actions, to continue or not 11 | * with the normal crawling process. 12 | * 13 | * @since 3.8 14 | */ 15 | @FunctionalInterface 16 | public interface UnexpectedAlertHandler { 17 | 18 | /** 19 | * Handles the unexpected/unhandled alert and tells whether or not the browser action should be 20 | * retried. 21 | *

22 | * Called when an {@code UnhandledAlertException} is caught after trying to execute a browser 23 | * action. 24 | * 25 | * @param browser 26 | * the browser that was executing the action. 27 | * @param alertText 28 | * the text/message of the alert. 29 | * @return {@code true} if the action should be retried, {@code false} otherwise. 30 | */ 31 | boolean handleAlert(WebDriver browser, String alertText); 32 | } 33 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/exception/BrowserConnectionException.java: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | 3 | package com.crawljax.core.exception; 4 | 5 | import org.openqa.selenium.WebDriverException; 6 | 7 | /** 8 | * This {@link RuntimeException} is thrown when a EmbeddedBrowser lost connection to its underlying 9 | * implementation and so crashed. 10 | * 11 | * @author slenselink@google.com (Stefan Lenselink) 12 | */ 13 | public class BrowserConnectionException extends RuntimeException { 14 | 15 | /** 16 | * Generated serial version UID 17 | */ 18 | private static final long serialVersionUID = -5149214539340150056L; 19 | 20 | /** 21 | * Create a new BrowserConnectionException based on a previous catched RuntimeException. 22 | * 23 | * @param exception 24 | * the original exception to wrap. 25 | */ 26 | public BrowserConnectionException(WebDriverException exception) { 27 | super(exception); 28 | } 29 | } -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/exception/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This is the exception package, containing the Exceptions used by Crawljax. 3 | */ 4 | package com.crawljax.core.exception; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/DomChangeNotifierPlugin.java: -------------------------------------------------------------------------------- 1 | /** 2 | * A plugin interface to provide an extension point for comparing the current state with the new 3 | * state induced after firing the event. Note that if you add more than one instance of this type of 4 | * plugin to crawljax, only the last added instance will be used for performing the DOM comparison 5 | * and all others will be ignored. 6 | */ 7 | package com.crawljax.core.plugin; 8 | 9 | import com.crawljax.core.CrawlerContext; 10 | import com.crawljax.core.state.Eventable; 11 | 12 | /** 13 | * This plugins lets you override the default state comparison that Crawljax uses. 14 | * 15 | * @deprecated Allthough new states are selected based on this plugin, the actual state comparison used by the 16 | * backing StateFlowGraph is uses the {@link Object#hashCode()} and {@link Object#equals(Object)} function of the 17 | * {@link com.crawljax.core.state.StateVertex}. To implement correct behaviour, do note use this class but specify a 18 | * custom {@link com.crawljax.core.state.StateVertexFactory} in the 19 | * {@link com.crawljax.core.configuration.CrawljaxConfiguration}. This method will be removed in Crawljax 4.x 20 | */ 21 | @Deprecated 22 | public interface DomChangeNotifierPlugin extends Plugin { 23 | 24 | /** 25 | * Check to see if the (new) DOM is changed with regards to the old DOM. 26 | *

27 | * This method can be called from multiple threads with different {@link CrawlerContext} 28 | *

29 | * 30 | * @param context The Crawler context. 31 | * @param domBefore the state before the event. 32 | * @param domAfter the state after the event. 33 | * @return true if the state is changed according to the compare method of the oracle. 34 | * @deprecated See class documentation. This method will be removed in Crawljax 4.x 35 | */ 36 | @Deprecated 37 | boolean isDomChanged(CrawlerContext context, String domBefore, Eventable e, String domAfter); 38 | 39 | } 40 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/GeneratesOutput.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | /** 4 | * Interface that adds a setOutputFolder and getOutputFolder method to allow the user to set the 5 | * directory this plugin should use for its output. Note that the output path should be used as an 6 | * absolute path. 7 | */ 8 | public interface GeneratesOutput { 9 | 10 | /** 11 | * Sets the absolute output directory that should be used by this plugin. 12 | * 13 | * @param absolutePath 14 | * The path to the output directory to use. 15 | */ 16 | void setOutputFolder(String absolutePath); 17 | 18 | /** 19 | * Get the absolute path of the output directory that was specified by the user. 20 | * 21 | * @return The path to use for writing files to. 22 | */ 23 | String getOutputFolder(); 24 | } 25 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/HostInterface.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import java.io.File; 4 | import java.util.Map; 5 | 6 | public interface HostInterface { 7 | 8 | public File getOutputDirectory(); 9 | public Map getParameters(); 10 | } 11 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/HostInterfaceImpl.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import java.io.File; 4 | import java.util.Map; 5 | 6 | public class HostInterfaceImpl implements HostInterface { 7 | 8 | private File outputDirectory; 9 | private Map parameters; 10 | 11 | public HostInterfaceImpl(File outputDirectory, Map parameters) { 12 | this.outputDirectory = outputDirectory; 13 | this.parameters = parameters; 14 | } 15 | 16 | @Override 17 | public File getOutputDirectory() { 18 | return outputDirectory; 19 | } 20 | 21 | public void setOutputDirectory(File outputDirectory) { 22 | this.outputDirectory = outputDirectory; 23 | } 24 | 25 | @Override 26 | public Map getParameters() { 27 | return parameters; 28 | } 29 | 30 | public void setParameters(Map parameters) { 31 | this.parameters = parameters; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/OnBrowserCreatedPlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import com.crawljax.browser.EmbeddedBrowser; 4 | 5 | /** 6 | * This interface denotes the Plugin type that is executed everytime when a new 7 | * Browser is created. This can be used to do login, database changes, 8 | * statistics etc. everytime a new browser is started. 9 | * 10 | * @author Stefan Lenselink <S.R.Lenselink@student.tudelft.nl> 11 | */ 12 | public interface OnBrowserCreatedPlugin extends Plugin { 13 | 14 | /** 15 | * This method is executed when a new browser has been created and ready to 16 | * be used by the Crawler. The PreCrawling plugins are executed before these 17 | * plugins are executed except that the precrawling plugins are only 18 | * executed on the first created browser. while this plugin is executed on 19 | * every new browser. 20 | * 21 | * @param newBrowser 22 | * the new created browser object 23 | */ 24 | void onBrowserCreated(EmbeddedBrowser newBrowser); 25 | 26 | } 27 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/OnFireEventFailedPlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import java.util.List; 4 | 5 | import com.crawljax.core.CrawlerContext; 6 | import com.crawljax.core.state.Eventable; 7 | 8 | /** 9 | * Plugin type that is called every time event that was requested to fire failed firing. 10 | */ 11 | public interface OnFireEventFailedPlugin extends Plugin { 12 | 13 | /** 14 | * Method that is called when an event that was requested to fire failed firing. 15 | *

16 | * This method can be called from multiple threads with different {@link CrawlerContext} 17 | *

18 | * 19 | * @param context 20 | * The per crawler context. 21 | * @param eventable 22 | * the eventable that failed to execute 23 | * @param pathToFailure 24 | * the list of eventable lead TO this failed eventable, the eventable excluded. 25 | */ 26 | void onFireEventFailed(CrawlerContext context, Eventable eventable, 27 | List pathToFailure); 28 | 29 | } 30 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/OnInvariantViolationPlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import com.crawljax.condition.invariant.Invariant; 4 | import com.crawljax.core.CrawlerContext; 5 | 6 | /** 7 | * Plugin type that is called every time an invariant is violated. Invariants are checked after each 8 | * detected state change. 9 | */ 10 | public interface OnInvariantViolationPlugin extends Plugin { 11 | 12 | /** 13 | * Method that is called when an invariant is violated. 14 | *

15 | * This method can be called from multiple threads with different {@link CrawlerContext} 16 | *

17 | * 18 | * @param invariant 19 | * the failed invariant. 20 | * @param context 21 | * the browsers context 22 | */ 23 | void onInvariantViolation(Invariant invariant, CrawlerContext context); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/OnNewStatePlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import com.crawljax.core.CrawlerContext; 4 | import com.crawljax.core.state.StateFlowGraph; 5 | import com.crawljax.core.state.StateVertex; 6 | 7 | /** 8 | * Plugin type that is called every time a new state is found by Crawljax. This also happens for the 9 | * Index State. Example: DOM validation. 10 | */ 11 | public interface OnNewStatePlugin extends Plugin { 12 | 13 | /** 14 | * Method that is called when a new state is found. When this method is called the state is 15 | * already added to the {@link StateFlowGraph}. 16 | *

17 | * This method can be called from multiple threads with different {@link CrawlerContext} 18 | *

19 | * 20 | * @param context 21 | * the current context. 22 | * @param newState 23 | * The new state. Equivalent to {@link CrawlerContext#getCurrentState()}. 24 | */ 25 | void onNewState(CrawlerContext context, StateVertex newState); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/OnRevisitStatePlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import com.crawljax.core.CrawlerContext; 4 | import com.crawljax.core.state.StateVertex; 5 | 6 | /** 7 | * Plugin type that is called every time a state is revisited by Crawljax. Example: Benchmarking. 8 | * This plugin needs an explicit current state because the session.getCurrentState() does not 9 | * contain the correct current state since we are in back-tracking phase. 10 | */ 11 | public interface OnRevisitStatePlugin extends Plugin { 12 | 13 | /** 14 | * Method that is called every time a state is revisited by Crawljax. Warning: changing the 15 | * state can influence crawljax, it is not a copy. 16 | *

17 | * This method can be called from multiple threads with different {@link CrawlerContext} 18 | *

19 | * 20 | * @param context 21 | * the crawlSession 22 | * @param currentState 23 | * the state revisited 24 | */ 25 | void onRevisitState(CrawlerContext context, StateVertex currentState); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/OnUrlLoadPlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import com.crawljax.core.CrawlerContext; 4 | 5 | /** 6 | * Plugin type that is called after the initial URL is (re)loaded. Example: refreshing the page 7 | * (clear the browser cache). The OnURLloadPlugins are run just after the Browser has gone to the 8 | * initial URL. Not only the first time but also every time the Core navigates back (back-tracking). 9 | */ 10 | public interface OnUrlLoadPlugin extends Plugin { 11 | 12 | /** 13 | * Method that is called after the url is (re) loaded. Warning: changing the browser can change 14 | * the behaviour of Crawljax. It is not a copy! 15 | *

16 | * This method can be called from multiple threads with different {@link CrawlerContext} 17 | *

18 | * 19 | * @param context 20 | * the current crawler context. 21 | */ 22 | void onUrlLoad(CrawlerContext context); 23 | 24 | } -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/Plugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | /** 4 | * Main interface for all type of plugins, there are 8 different types of 5 | * Plugnis. 6 | * 7 | * 8 | * 9 | * 10 | * 11 | * 12 | * 13 | * 14 | * 15 | * 16 | * 17 | * 18 | * 19 | * 20 | * 21 | * 22 | * 23 | * 24 | * 25 | * 26 | * 27 | * 28 | * 29 | * 30 | * 31 | * 32 | * 33 | * 34 | * 35 | * 36 | * 37 | * 38 | * 39 | * 40 | * 41 | * 42 | * 43 | * 44 | * 45 | * 46 | * 47 | * 48 | * 49 | *
TypeExecutedExamples
OnNewStatePluginWhen a new state is found while crawlingCreate Screenshots, Validate DOM
OnRevisitStatePluginWhen a state is revisitedCrawljax benchmarking
OnUrlLoadPluginAfter the initial URL is (re)loadedReset back-end state
OnInvariantViolationPluginWhen an invariant fails validationReport builder
PreStateCrawlingPluginBefore a new state is crawledLogging candidate elements
PostCrawlingPluginAfter the crawlingGenerating tests from the state machine
ProxyServerPluginBefore the crawling, at the initialization of the coreLoading a custom proxy configuration in the used browser
50 | */ 51 | public interface Plugin { 52 | 53 | } 54 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/PostCrawlingPlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import com.crawljax.core.CrawlSession; 4 | import com.crawljax.core.ExitNotifier.ExitStatus; 5 | 6 | /** 7 | * Plugin type that is called after the crawling phase is finished. Examples: report generation, 8 | * test generation 9 | */ 10 | public interface PostCrawlingPlugin extends Plugin { 11 | 12 | /** 13 | * Method that is called after the crawling is finished. Warning: changing the session can 14 | * change the behavior of other post crawl plugins. It is not a copy! 15 | * 16 | * @param session 17 | * the crawl session. 18 | * @param exitReason 19 | * The {@link ExitStatus} Crawljax stopped. 20 | */ 21 | void postCrawling(CrawlSession session, ExitStatus exitReason); 22 | 23 | } 24 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/PreCrawlingPlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import java.net.URL; 4 | 5 | import com.crawljax.core.configuration.CrawljaxConfiguration; 6 | 7 | /** 8 | * {@link Plugin} that is called before the crawling starts and before the initial URL has been 9 | * loaded. This kind of plugins can be used to do for example 'once in a crawlsession' operations 10 | * like logging in a web application or reset the database to a 'clean' state. 11 | */ 12 | public interface PreCrawlingPlugin extends Plugin { 13 | 14 | /** 15 | * Method that is called before Crawljax loads the initial {@link URL} and before the core 16 | * starts crawling. 17 | * 18 | * @param config 19 | * The {@link CrawljaxConfiguration} for the coming crawl. 20 | */ 21 | void preCrawling(CrawljaxConfiguration config) throws RuntimeException; 22 | } 23 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/plugin/PreStateCrawlingPlugin.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.plugin; 2 | 3 | import com.crawljax.core.CandidateElement; 4 | import com.crawljax.core.CrawlerContext; 5 | import com.crawljax.core.state.StateVertex; 6 | import com.google.common.collect.ImmutableList; 7 | 8 | /** 9 | * Plugin type that is called before firing events on the current DOM state. 10 | */ 11 | public interface PreStateCrawlingPlugin extends Plugin { 12 | 13 | /** 14 | * Method that is called before firing events on the current DOM state. Warning the session and 15 | * candidateElements are not clones, changes will result in changed behavior. 16 | *

17 | * This method can be called from multiple threads with different {@link CrawlerContext} 18 | *

19 | * 20 | * @param context 21 | * the current session data. 22 | * @param candidateElements 23 | * the candidates for the current state. 24 | * @param state 25 | * The state being crawled 26 | */ 27 | void preStateCrawling(CrawlerContext context, 28 | ImmutableList candidateElements, StateVertex state); 29 | 30 | } 31 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/state/DefaultStateVertexFactory.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.state; 2 | 3 | /** 4 | * The default factory that creates State vertexes with a {@link Object#hashCode()} and {@link Object#equals(Object)} 5 | * function based on the Stripped dom. 6 | */ 7 | public class DefaultStateVertexFactory extends StateVertexFactory { 8 | 9 | @Override 10 | public StateVertex newStateVertex(int id, String url, String name, String dom, String strippedDom) { 11 | return new StateVertexImpl(id, url, name, dom, strippedDom); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/core/state/StateVertexFactory.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.state; 2 | 3 | /** 4 | * A factory that creates a {@link com.crawljax.core.state.StateVertex}. This factory can be implemented 5 | * if you want to use custom states that use a different {@link Object#hashCode()} or {@link Object#equals(Object)} 6 | * method. 7 | */ 8 | public abstract class StateVertexFactory { 9 | 10 | /** 11 | * Defines a State. 12 | * 13 | * @param url the current url of the state 14 | * @param name the name of the state 15 | * @param dom the current DOM tree of the browser 16 | * @param strippedDom the stripped dom by the OracleComparators 17 | */ 18 | public abstract StateVertex newStateVertex(int id, String url, String name, String dom, String strippedDom); 19 | 20 | 21 | /** 22 | * @return The index {@link StateVertex}. 23 | */ 24 | public StateVertex createIndex(String url, String dom, String strippedDom) { 25 | return newStateVertex(StateVertex.INDEX_ID, url, "index", dom, strippedDom); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/forms/InputValue.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.forms; 2 | 3 | /** 4 | * Value for a FormInput. 5 | * 6 | * @author dannyroest@gmail.com (Danny Roest) 7 | */ 8 | public class InputValue { 9 | 10 | private long id; 11 | private String value; 12 | private boolean checked = false; 13 | 14 | /** 15 | * default constructor. 16 | */ 17 | public InputValue() { 18 | 19 | } 20 | 21 | /** 22 | * @param value 23 | * the text value 24 | */ 25 | public InputValue(String value) { 26 | this(value, true); 27 | } 28 | 29 | /** 30 | * Created a form input value. 31 | * 32 | * @param value 33 | * the text value 34 | * @param checked 35 | * whether the element should be checked 36 | */ 37 | public InputValue(String value, boolean checked) { 38 | this.value = value; 39 | this.checked = checked; 40 | } 41 | 42 | @Override 43 | public String toString() { 44 | return getValue(); 45 | } 46 | 47 | /** 48 | * @return the id 49 | */ 50 | public long getId() { 51 | return id; 52 | } 53 | 54 | /** 55 | * @param id 56 | * the id to set 57 | */ 58 | public void setId(long id) { 59 | this.id = id; 60 | } 61 | 62 | /** 63 | * @return the value 64 | */ 65 | public String getValue() { 66 | return value; 67 | } 68 | 69 | /** 70 | * @param value 71 | * the value to set 72 | */ 73 | public void setValue(String value) { 74 | this.value = value; 75 | } 76 | 77 | /** 78 | * @return the checked 79 | */ 80 | public boolean isChecked() { 81 | return checked; 82 | } 83 | 84 | /** 85 | * @param checked 86 | * the checked to set 87 | */ 88 | public void setChecked(boolean checked) { 89 | this.checked = checked; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/forms/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Forms package. 3 | */ 4 | package com.crawljax.forms; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/metrics/MetricsModule.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.metrics; 2 | 3 | import com.codahale.metrics.Metric; 4 | import com.codahale.metrics.MetricRegistry; 5 | import com.google.inject.AbstractModule; 6 | import com.google.inject.Module; 7 | 8 | /** 9 | * The module used for setting up Metrics. 10 | */ 11 | public class MetricsModule extends AbstractModule implements Module { 12 | 13 | /** 14 | * The prefix for a {@link Metric} concerning Crawljax. 15 | */ 16 | public static final String CRAWL_PREFIX = "com.crawljax.crawl"; 17 | 18 | /** 19 | * The prefix for a {@link Metric} concerning the events during a crawl. 20 | */ 21 | public static final String EVENTS_PREFIX = CRAWL_PREFIX + "events."; 22 | 23 | /** 24 | * The prefix for a {@link Metric} concerning the plugins. 25 | */ 26 | public static final String PLUGINS_PREFIX = CRAWL_PREFIX + "plugins."; 27 | 28 | @Override 29 | protected void configure() { 30 | bind(MetricRegistry.class).asEagerSingleton(); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/AbstractComparator.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.oraclecomparator; 2 | 3 | import java.util.List; 4 | 5 | import net.jcip.annotations.NotThreadSafe; 6 | 7 | import org.custommonkey.xmlunit.Difference; 8 | 9 | import com.crawljax.util.DomUtils; 10 | 11 | /** 12 | * The Abstract base class of all the Comparators. All comparators are not Thread safe as 13 | * comparators are shared between Threads and the origionalDom and newDom can not be final. 14 | */ 15 | @NotThreadSafe 16 | public abstract class AbstractComparator implements Comparator { 17 | 18 | @Override 19 | public List getDifferences(String oldDom, String newDom) { 20 | return DomUtils.getDifferences(normalize(oldDom), normalize(newDom)); 21 | } 22 | 23 | @Override 24 | public boolean isEquivalent(String oldDom, String newDom) { 25 | boolean equivalent = false; 26 | if (StateComparator.COMPARE_IGNORE_CASE) { 27 | equivalent = normalize(oldDom).equalsIgnoreCase(normalize(newDom)); 28 | } else { 29 | equivalent = normalize(oldDom).equals(normalize(newDom)); 30 | } 31 | return equivalent; 32 | } 33 | 34 | /** 35 | * Override this method to apply normalization to the comparison. 36 | * 37 | * @param dom 38 | * The original DOM 39 | * @return the normalized DOM. 40 | */ 41 | @Override 42 | public String normalize(String dom) { 43 | return dom; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/Comparator.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.oraclecomparator; 2 | 3 | import java.util.List; 4 | 5 | import net.jcip.annotations.NotThreadSafe; 6 | 7 | import org.custommonkey.xmlunit.Difference; 8 | 9 | /** 10 | * Interface for oracle comparators. 11 | */ 12 | @NotThreadSafe 13 | public interface Comparator { 14 | 15 | /** 16 | * @return The differences between the two DOMs 17 | */ 18 | List getDifferences(String oldDom, String newDom); 19 | 20 | /** 21 | * @return if the originalDom and the newDom are equivalent 22 | */ 23 | boolean isEquivalent(String oldDom, String newDom); 24 | 25 | /** 26 | * @return The normalized DOM, on which the comparison is made. 27 | */ 28 | String normalize(String dom); 29 | 30 | } 31 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/comparators/AttributeComparator.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.oraclecomparator.comparators; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import com.crawljax.oraclecomparator.AbstractComparator; 7 | 8 | /** 9 | * Oracle Comparator that ignores the specified attributes. 10 | */ 11 | public class AttributeComparator extends AbstractComparator { 12 | 13 | private final List ignoreAttributes = new ArrayList(); 14 | 15 | /** 16 | * @param attributes 17 | * the attributes to ignore 18 | */ 19 | public AttributeComparator(String... attributes) { 20 | for (String attribute : attributes) { 21 | ignoreAttributes.add(attribute); 22 | } 23 | } 24 | 25 | @Override 26 | public String normalize(String dom) { 27 | String strippedDom = dom; 28 | for (String attribute : ignoreAttributes) { 29 | String regExp = "\\s" + attribute + "=\"[^\"]*\""; 30 | strippedDom = strippedDom.replaceAll(regExp, ""); 31 | } 32 | return strippedDom; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/comparators/PlainStructureComparator.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.oraclecomparator.comparators; 2 | 3 | import com.crawljax.oraclecomparator.AbstractComparator; 4 | 5 | public class PlainStructureComparator extends AbstractComparator { 6 | 7 | private final boolean removeAttributes; 8 | 9 | public PlainStructureComparator() { 10 | this(true); 11 | } 12 | 13 | public PlainStructureComparator(boolean removeAttributes) { 14 | this.removeAttributes = removeAttributes; 15 | } 16 | 17 | @Override 18 | public String normalize(String dom) { 19 | String normalized = dom; 20 | if (removeAttributes) { 21 | normalized = stripAttributes(normalized); 22 | } 23 | return stripContent(normalized); 24 | } 25 | 26 | private String stripAttributes(String string) { 27 | String regExAttributes = "<(.+?)(\\s.*?)?(/)?>"; 28 | String ret = string.replaceAll(regExAttributes, "<$1$3>"); 29 | return ret; 30 | } 31 | 32 | private String stripContent(String string) { 33 | String strippedStr; 34 | 35 | // remove linebreaks 36 | strippedStr = string.replaceAll("[\\t\\n\\x0B\\f\\r]", ""); 37 | 38 | // remove content 39 | strippedStr = strippedStr.replaceAll(">(.*?)<", "><"); 40 | return strippedStr; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/comparators/RegexComparator.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.oraclecomparator.comparators; 2 | 3 | import java.util.Collection; 4 | 5 | import com.crawljax.oraclecomparator.AbstractComparator; 6 | import com.crawljax.util.DomUtils; 7 | import com.google.common.collect.ImmutableList; 8 | 9 | /** 10 | * Regex oracles that strips content from the DOM to check whether the DOMs are equal without the 11 | * specified regular expressions. 12 | * 13 | * @author dannyroest@gmail.com (Danny Roest) 14 | */ 15 | public class RegexComparator extends AbstractComparator { 16 | 17 | // NOTE: the ordering can be important 18 | private final ImmutableList regexs; 19 | 20 | public RegexComparator(Collection regexs) { 21 | this.regexs = ImmutableList.copyOf(regexs); 22 | } 23 | 24 | public RegexComparator(String... regexs) { 25 | this.regexs = ImmutableList.copyOf(regexs); 26 | } 27 | 28 | @Override 29 | public String normalize(String dom) { 30 | String normalized = dom; 31 | for (String regex : regexs) { 32 | normalized = DomUtils.replaceString(normalized, regex, ""); 33 | } 34 | return normalized; 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/comparators/ScriptComparator.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.oraclecomparator.comparators; 2 | 3 | import java.io.IOException; 4 | 5 | import org.slf4j.Logger; 6 | import org.slf4j.LoggerFactory; 7 | import org.w3c.dom.Document; 8 | 9 | import com.crawljax.oraclecomparator.AbstractComparator; 10 | import com.crawljax.util.DomUtils; 11 | 12 | public class ScriptComparator extends AbstractComparator { 13 | 14 | private static final Logger LOGGER = LoggerFactory.getLogger(AbstractComparator.class 15 | .getName()); 16 | 17 | @Override 18 | public String normalize(String dom) { 19 | Document orgDoc; 20 | try { 21 | orgDoc = DomUtils.asDocument(dom); 22 | orgDoc = DomUtils.removeScriptTags(orgDoc); 23 | return DomUtils.getDocumentToString(orgDoc); 24 | } catch (IOException e) { 25 | LOGGER.warn("Could not perform DOM comparison", e); 26 | return dom; 27 | } 28 | 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/comparators/SimpleComparator.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.oraclecomparator.comparators; 2 | 3 | import com.crawljax.oraclecomparator.AbstractComparator; 4 | 5 | /** 6 | * Simple oracle which can ignore whitespaces and linebreaks. 7 | */ 8 | public class SimpleComparator extends AbstractComparator { 9 | 10 | /** 11 | * Default argument less constructor. 12 | */ 13 | public SimpleComparator() { 14 | super(); 15 | } 16 | 17 | @Override 18 | public String normalize(String string) { 19 | String strippedStr; 20 | 21 | // remove linebreaks 22 | strippedStr = string.replaceAll("[\\t\\n\\x0B\\f\\r]", ""); 23 | 24 | // remove just before and after elements spaces 25 | strippedStr = strippedStr.replaceAll(">[ ]*", ">"); 26 | strippedStr = strippedStr.replaceAll("[ ]*<", "<"); 27 | 28 | return strippedStr; 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/comparators/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Oracles oracles package. 3 | */ 4 | package com.crawljax.oraclecomparator.comparators; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/oraclecomparator/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Oracle package. 3 | */ 4 | package com.crawljax.oraclecomparator; -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/util/DOMComparer.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.util; 2 | 3 | import java.util.List; 4 | 5 | import org.custommonkey.xmlunit.DetailedDiff; 6 | import org.custommonkey.xmlunit.Diff; 7 | import org.custommonkey.xmlunit.Difference; 8 | import org.w3c.dom.Document; 9 | 10 | /** 11 | * This class allows to compare two Document objects and save the differences in a list. 12 | * 13 | * @author beze232056 14 | */ 15 | public class DOMComparer { 16 | /** 17 | * The control document. This is used as a base to compare the testDOM with. 18 | */ 19 | private final Document controlDOM; 20 | 21 | /** 22 | * The test document. This is the document in which we want to detect differences. 23 | */ 24 | private final Document testDOM; 25 | 26 | /** 27 | * Constructor. 28 | * 29 | * @param controlDOM 30 | * The control DOM. 31 | * @param testDOM 32 | * The test DOM. 33 | */ 34 | public DOMComparer(Document controlDOM, Document testDOM) { 35 | this.controlDOM = controlDOM; 36 | this.testDOM = testDOM; 37 | } 38 | 39 | /** 40 | * Compare the controlDOM and testDOM and save and return the differences in a list. 41 | * 42 | * @return list with differences 43 | */ 44 | @SuppressWarnings("unchecked") 45 | public List compare() { 46 | Diff diff = new Diff(this.controlDOM, this.testDOM); 47 | DetailedDiff detDiff = new DetailedDiff(diff); 48 | return detDiff.getAllDifferences(); 49 | } 50 | 51 | } -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/util/DomDifferenceListener.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.util; 2 | 3 | import java.util.List; 4 | 5 | import org.custommonkey.xmlunit.Difference; 6 | import org.custommonkey.xmlunit.DifferenceListener; 7 | import org.w3c.dom.Node; 8 | 9 | final class DomDifferenceListener implements DifferenceListener { 10 | private final List ignoreAttributes; 11 | 12 | DomDifferenceListener(List ignoreAttributes) { 13 | this.ignoreAttributes = ignoreAttributes; 14 | } 15 | 16 | @Override 17 | public void skippedComparison(Node control, Node test) { 18 | } 19 | 20 | @Override 21 | public int differenceFound(Difference difference) { 22 | if (difference.getControlNodeDetail() == null 23 | || difference.getControlNodeDetail().getNode() == null 24 | || difference.getTestNodeDetail() == null 25 | || difference.getTestNodeDetail().getNode() == null) { 26 | return RETURN_ACCEPT_DIFFERENCE; 27 | } 28 | if (ignoreAttributes.contains(difference.getTestNodeDetail().getNode() 29 | .getNodeName()) 30 | || ignoreAttributes.contains(difference.getControlNodeDetail() 31 | .getNode().getNodeName())) { 32 | return RETURN_IGNORE_DIFFERENCE_NODES_IDENTICAL; 33 | } 34 | return RETURN_ACCEPT_DIFFERENCE; 35 | } 36 | } -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/util/HtmlNamespace.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Created Aug 7, 2008 3 | */ 4 | package com.crawljax.util; 5 | 6 | import java.util.Iterator; 7 | 8 | import javax.xml.XMLConstants; 9 | import javax.xml.namespace.NamespaceContext; 10 | 11 | /** 12 | * @author mesbah 13 | */ 14 | public class HtmlNamespace implements NamespaceContext { 15 | /** 16 | * @param prefix 17 | * The prefix of the URI. 18 | * @return The namespace URI. 19 | */ 20 | @Override 21 | public String getNamespaceURI(String prefix) { 22 | if (prefix == null) { 23 | throw new NullPointerException("Null prefix"); 24 | } else if ("html".equals(prefix)) { 25 | return "http://www.w3.org/1999/xhtml"; 26 | } else if ("xml".equals(prefix)) { 27 | return XMLConstants.XML_NS_URI; 28 | } 29 | 30 | return XMLConstants.DEFAULT_NS_PREFIX; 31 | } 32 | 33 | // This method isn't necessary for XPath processing. 34 | 35 | /** 36 | * @param uri 37 | * TODO: DOCUMENT ME! 38 | * @return TODO: DOCUMENT ME! 39 | */ 40 | @Override 41 | public String getPrefix(String uri) { 42 | throw new UnsupportedOperationException(); 43 | } 44 | 45 | // This method isn't necessary for XPath processing either. 46 | /** 47 | * @param uri 48 | * TODO: DOCUMENT ME! 49 | * @return TODO: DOCUMENT ME! 50 | */ 51 | @Override 52 | public Iterator getPrefixes(String uri) { 53 | throw new UnsupportedOperationException(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/util/XMLObject.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.util; 2 | 3 | import java.beans.XMLDecoder; 4 | import java.beans.XMLEncoder; 5 | import java.io.FileInputStream; 6 | import java.io.FileNotFoundException; 7 | import java.io.FileOutputStream; 8 | 9 | /** 10 | * XMLObject helper. 11 | */ 12 | public final class XMLObject { 13 | 14 | private XMLObject() { 15 | 16 | } 17 | 18 | /** 19 | * Converts an object to an XML file. 20 | * 21 | * @param object 22 | * The object to convert. 23 | * @param fname 24 | * The filename where to save it to. 25 | * @throws FileNotFoundException 26 | * On error. 27 | */ 28 | public static void objectToXML(Object object, String fname) throws FileNotFoundException { 29 | FileOutputStream fo = new FileOutputStream(fname); 30 | XMLEncoder encoder = new XMLEncoder(fo); 31 | encoder.writeObject(object); 32 | encoder.close(); 33 | } 34 | 35 | /** 36 | * Converts an XML file to an object. 37 | * 38 | * @param fname 39 | * The filename where to save it to. 40 | * @throws FileNotFoundException 41 | * On error. 42 | * @return The object. 43 | */ 44 | public static Object xmlToObject(String fname) throws FileNotFoundException { 45 | FileInputStream fi = new FileInputStream(fname); 46 | XMLDecoder decoder = new XMLDecoder(fi); 47 | Object object = decoder.readObject(); 48 | decoder.close(); 49 | return object; 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /core/src/main/java/com/crawljax/util/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Utility classes. 3 | */ 4 | package com.crawljax.util; 5 | 6 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/browser/BrowserClosesDownloadPopUp.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.browser; 2 | 3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates; 4 | import static org.junit.Assert.assertThat; 5 | 6 | import org.eclipse.jetty.util.resource.Resource; 7 | import org.junit.Test; 8 | 9 | import com.crawljax.core.CrawlSession; 10 | import com.crawljax.test.BaseCrawler; 11 | import com.crawljax.test.BrowserTest; 12 | import org.junit.experimental.categories.Category; 13 | 14 | @Category(BrowserTest.class) 15 | public class BrowserClosesDownloadPopUp { 16 | 17 | @Test 18 | public void webBrowserWindowOpensItIsIgnored() { 19 | BaseCrawler crawler = 20 | new BaseCrawler(Resource.newClassPathResource("/site"), "download/download.html"); 21 | CrawlSession crawl = crawler.crawl(); 22 | assertThat(crawl.getStateFlowGraph(), hasStates(2)); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/browser/ChromeProxyConfig.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.browser; 2 | 3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates; 4 | import static org.junit.Assert.assertThat; 5 | 6 | import org.eclipse.jetty.util.resource.Resource; 7 | import org.junit.Test; 8 | import org.junit.experimental.categories.Category; 9 | 10 | import com.crawljax.browser.EmbeddedBrowser.BrowserType; 11 | import com.crawljax.core.CrawlSession; 12 | import com.crawljax.core.configuration.BrowserConfiguration; 13 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; 14 | import com.crawljax.test.BaseCrawler; 15 | import com.crawljax.test.BrowserTest; 16 | import com.crawljax.test.Utils; 17 | 18 | @Category(BrowserTest.class) 19 | public class ChromeProxyConfig { 20 | 21 | @Test 22 | public void chromeProxyConfig() throws Exception { 23 | Utils.assumeBinary("webdriver.chrome.driver", "chromedriver"); 24 | 25 | CrawlSession crawl = 26 | new BaseCrawler(Resource.newClassPathResource("/site"), 27 | "simplelink/simplelink.html") { 28 | @Override 29 | public CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() { 30 | CrawljaxConfigurationBuilder builder = 31 | super.newCrawlConfigurationBuilder(); 32 | builder.setBrowserConfig(new BrowserConfiguration(BrowserType.CHROME)); 33 | return builder; 34 | } 35 | }.crawl(); 36 | assertThat(crawl.getStateFlowGraph(), hasStates(2)); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/condition/BrowserDoesntLeaveUrlTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.condition; 2 | 3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates; 4 | import static org.junit.Assert.assertThat; 5 | 6 | import org.junit.Test; 7 | import org.junit.experimental.categories.Category; 8 | 9 | import com.crawljax.core.CrawlSession; 10 | import com.crawljax.test.BaseCrawler; 11 | import com.crawljax.test.BrowserTest; 12 | 13 | @Category(BrowserTest.class) 14 | public class BrowserDoesntLeaveUrlTest { 15 | 16 | @Test 17 | public void whenJavaScriptNavigatesAwayFromPageItIsBlocked() throws Exception { 18 | BaseCrawler crawler = new BaseCrawler("navigate_other_urls.html"); 19 | // crawler.showWebSite(); 20 | CrawlSession session = crawler.crawl(); 21 | assertThat(session.getStateFlowGraph(), hasStates(5)); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/ExitNotifierTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | import static org.hamcrest.core.Is.is; 4 | import static org.junit.Assert.assertThat; 5 | 6 | import org.junit.Test; 7 | 8 | import com.crawljax.core.ExitNotifier.ExitStatus; 9 | 10 | public class ExitNotifierTest { 11 | 12 | private ExitNotifier notifier; 13 | 14 | @Test(timeout = 2000) 15 | public void whenMaximumStatesReachedItExists() throws InterruptedException { 16 | notifier = new ExitNotifier(2); 17 | notifier.incrementNumberOfStates(); 18 | notifier.incrementNumberOfStates(); 19 | ExitStatus reason = notifier.awaitTermination(); 20 | assertThat(reason, is(ExitStatus.MAX_STATES)); 21 | 22 | } 23 | 24 | @Test(timeout = 2000) 25 | public void whenNoStateLimitItDoesntTerminate() throws InterruptedException { 26 | notifier = new ExitNotifier(0); 27 | notifier.incrementNumberOfStates(); 28 | notifier.incrementNumberOfStates(); 29 | assertThat(notifier.isExitCalled(), is(false)); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/NestedFramesTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | import com.crawljax.browser.BrowserProvider; 4 | import com.crawljax.test.BrowserTest; 5 | import com.crawljax.test.RunWithWebServer; 6 | import org.junit.ClassRule; 7 | import org.junit.Rule; 8 | import org.junit.Test; 9 | import org.junit.experimental.categories.Category; 10 | import org.openqa.selenium.By; 11 | import org.openqa.selenium.WebDriver; 12 | import org.openqa.selenium.WebElement; 13 | 14 | @Category(BrowserTest.class) 15 | public class NestedFramesTest { 16 | 17 | @ClassRule 18 | public static final RunWithWebServer SERVER = new RunWithWebServer("/site/iframe"); 19 | 20 | private WebDriver driver; 21 | 22 | @Rule 23 | public BrowserProvider provider = new BrowserProvider(); 24 | 25 | @Test 26 | public void testNestedFramesIndex() { 27 | driver = provider.newBrowser(); 28 | driver.get(SERVER.getSiteUrl().toString()); 29 | 30 | driver.switchTo().frame(0); 31 | driver.switchTo().frame(0); 32 | 33 | WebElement button002 = driver.findElement(By.id("button002")); 34 | button002.click(); 35 | } 36 | 37 | 38 | } 39 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/PopUpTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core; 2 | 3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasEdges; 4 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates; 5 | import static org.junit.Assert.assertThat; 6 | 7 | import java.util.concurrent.TimeUnit; 8 | 9 | import org.junit.ClassRule; 10 | import org.junit.Test; 11 | import org.junit.experimental.categories.Category; 12 | 13 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; 14 | import com.crawljax.test.BrowserTest; 15 | import com.crawljax.test.RunWithWebServer; 16 | 17 | @Category(BrowserTest.class) 18 | public class PopUpTest { 19 | 20 | @ClassRule 21 | public static final RunWithWebServer WEB_SERVER = new RunWithWebServer("site"); 22 | 23 | @Test 24 | public void testPopups() throws CrawljaxException { 25 | CrawljaxConfigurationBuilder builder = WEB_SERVER.newConfigBuilder("popup"); 26 | builder.setMaximumDepth(3); 27 | builder.crawlRules().click("a"); 28 | builder.crawlRules().waitAfterEvent(100, TimeUnit.MILLISECONDS); 29 | builder.crawlRules().waitAfterReloadUrl(100, TimeUnit.MILLISECONDS); 30 | CrawljaxRunner runner = new CrawljaxRunner(builder.build()); 31 | CrawlSession session = runner.call(); 32 | assertThat(session.getStateFlowGraph(), hasEdges(3)); 33 | assertThat(session.getStateFlowGraph(), hasStates(4)); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/configuration/CrawlActionsTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import static com.crawljax.core.configuration.CrawlElementMatcher.withXpath; 4 | import static org.hamcrest.Matchers.containsInAnyOrder; 5 | import static org.hamcrest.collection.IsCollectionWithSize.hasSize; 6 | import static org.junit.Assert.assertThat; 7 | 8 | import java.util.List; 9 | 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | public class CrawlActionsTest { 14 | 15 | private CrawlActionsBuilder actions; 16 | 17 | @Before 18 | public void setup() { 19 | actions = new CrawlActionsBuilder(); 20 | } 21 | 22 | @Test 23 | public void testIncludesWork() { 24 | actions.click("a"); 25 | actions.click("b").underXPath("123"); 26 | actions.click("b").underXPath("sdfsdf"); 27 | List crawlElements = actions.build().getLeft(); 28 | assertThat(crawlElements, hasSize(3)); 29 | } 30 | 31 | @Test 32 | public void testExcludesWork() { 33 | actions.dontClick("a"); 34 | actions.dontClick("b").underXPath("123"); 35 | actions.dontClick("b").underXPath("sdfsdf"); 36 | List crawlElements = actions.build().getRight(); 37 | assertThat(crawlElements, hasSize(3)); 38 | } 39 | 40 | @Test 41 | @SuppressWarnings("unchecked") 42 | public void testExcludeParents() { 43 | actions.click("a"); 44 | actions.click("button"); 45 | actions.dontClickChildrenOf("b").withId("someId"); 46 | actions.dontClickChildrenOf("b").withClass("someClass"); 47 | List crawlElements = actions.build().getRight(); 48 | assertThat(crawlElements, hasSize(4)); 49 | assertThat( 50 | crawlElements, 51 | containsInAnyOrder(withXpath("//B[@id='someId']//*"), 52 | withXpath("//B[@id='someId']//*"), 53 | withXpath("//B[@class='someClass']//*"), 54 | withXpath("//B[@class='someClass']//*"))); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/configuration/CrawlElementMatcher.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import static org.hamcrest.core.IsEqual.equalTo; 4 | 5 | import org.hamcrest.Factory; 6 | import org.hamcrest.FeatureMatcher; 7 | import org.hamcrest.Matcher; 8 | 9 | public class CrawlElementMatcher { 10 | 11 | /** 12 | * @param xPath 13 | * checks {@link CrawlElement#getWithXpathExpression()} 14 | * @return A {@link Matcher} that inspects if the number of edges. 15 | */ 16 | @Factory 17 | public static FeatureMatcher withXpath(String xPath) { 18 | return new FeatureMatcher(equalTo(xPath), 19 | "CrawlElement with xPath", "xPath") { 20 | 21 | @Override 22 | protected String featureValueOf(CrawlElement actual) { 23 | return actual.getWithXpathExpression(); 24 | } 25 | }; 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/configuration/DefaultCrawlScopeTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import static org.hamcrest.core.Is.is; 4 | import static org.junit.Assert.assertThat; 5 | 6 | import java.net.URI; 7 | 8 | import org.junit.Test; 9 | 10 | public class DefaultCrawlScopeTest { 11 | 12 | private static final URI SEED = URI.create("http://localhost/"); 13 | 14 | @Test(expected = NullPointerException.class) 15 | public void nullSeedDomainIsNotAllowed() throws Exception { 16 | new DefaultCrawlScope((URI) null); 17 | } 18 | 19 | @Test 20 | public void defaultCrawlScopeShouldIncludeSeedDomain() throws Exception { 21 | CrawlScope defaultCrawlScope = new DefaultCrawlScope(SEED); 22 | assertThat(defaultCrawlScope.isInScope("http://localhost/in/scope"), is(true)); 23 | } 24 | 25 | @Test 26 | public void defaultCrawlScopeShouldNotIncludeNonSeedDomain() throws Exception { 27 | CrawlScope defaultCrawlScope = new DefaultCrawlScope(SEED); 28 | assertThat(defaultCrawlScope.isInScope("http://example.com/not/in/scope"), is(false)); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/configuration/UnderXPathTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates; 4 | import static org.junit.Assert.assertThat; 5 | 6 | import org.junit.ClassRule; 7 | import org.junit.Test; 8 | import org.junit.experimental.categories.Category; 9 | 10 | import com.crawljax.core.CrawlSession; 11 | import com.crawljax.core.CrawljaxRunner; 12 | import com.crawljax.core.configuration.CrawlRules.CrawlRulesBuilder; 13 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; 14 | import com.crawljax.test.BrowserTest; 15 | import com.crawljax.test.RunWithWebServer; 16 | 17 | /** 18 | * Test case for issue number 16: http://code.google.com/p/crawljax/issues/detail?id=16 19 | */ 20 | @Category(BrowserTest.class) 21 | public class UnderXPathTest { 22 | 23 | @ClassRule 24 | public static final RunWithWebServer SERVER = new RunWithWebServer("/site"); 25 | 26 | @Test 27 | public void testDontClickUnderXPath() throws Exception { 28 | CrawljaxConfigurationBuilder builder = SERVER.newConfigBuilder("underxpath.html"); 29 | builder.crawlRules().click("a"); 30 | builder.crawlRules().dontClick("a").underXPath("//A[@class=\"noClickClass\"]"); 31 | CrawlRulesBuilder rules = builder.crawlRules(); 32 | rules.dontClick("a").withAttribute("id", "noClickId"); 33 | rules.dontClickChildrenOf("div").withClass("noChildrenOfClass"); 34 | rules.dontClickChildrenOf("div").withId("noChildrenOfId"); 35 | 36 | CrawlSession session = new CrawljaxRunner(builder.build()).call(); 37 | 38 | assertThat(session.getStateFlowGraph(), hasStates(2)); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/configuration/XPathEscapeApostropheTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.configuration; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | 8 | import com.crawljax.core.state.Eventable.EventType; 9 | 10 | public class XPathEscapeApostropheTest { 11 | 12 | private CrawlElement element; 13 | 14 | @Before 15 | public void setup() { 16 | element = new CrawlElement(EventType.click, "button"); 17 | } 18 | 19 | @Test 20 | public void testStringNoApostrophes() { 21 | String test = "Test String"; 22 | test = element.escapeApostrophes(test); 23 | assertEquals("'Test String'", test); 24 | } 25 | 26 | @Test 27 | public void testStringConcat() { 28 | String test = "I'm Feeling Lucky"; 29 | test = element.escapeApostrophes(test); 30 | assertEquals("concat('I',\"'\",'m Feeling Lucky')", test); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/largetests/LargeChromeTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.largetests; 2 | 3 | import com.crawljax.browser.EmbeddedBrowser.BrowserType; 4 | import com.crawljax.core.CrawlSession; 5 | import com.crawljax.core.configuration.BrowserConfiguration; 6 | import com.crawljax.test.BrowserTest; 7 | import com.crawljax.test.Utils; 8 | 9 | import org.junit.BeforeClass; 10 | import org.junit.experimental.categories.Category; 11 | 12 | @Category(BrowserTest.class) 13 | public class LargeChromeTest extends LargeTestBase { 14 | 15 | private static CrawlSession session; 16 | 17 | @BeforeClass 18 | public static void setUpBeforeClass() throws Exception { 19 | Utils.assumeBinary("webdriver.chrome.driver", "chromedriver"); 20 | 21 | session = setup(new BrowserConfiguration(BrowserType.CHROME), 100, 100); 22 | } 23 | 24 | @Override 25 | protected CrawlSession getSession() { 26 | return session; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/largetests/LargeFirefoxTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.largetests; 2 | 3 | import org.junit.BeforeClass; 4 | import org.junit.experimental.categories.Category; 5 | 6 | import com.crawljax.browser.EmbeddedBrowser.BrowserType; 7 | import com.crawljax.core.CrawlSession; 8 | import com.crawljax.core.configuration.BrowserConfiguration; 9 | import com.crawljax.test.BrowserTest; 10 | import com.crawljax.test.Utils; 11 | 12 | @Category(BrowserTest.class) 13 | public class LargeFirefoxTest extends LargeTestBase { 14 | 15 | private static CrawlSession session; 16 | 17 | @BeforeClass 18 | public static void setUpBeforeClass() throws Exception { 19 | Utils.assumeBinary("webdriver.gecko.driver", "geckodriver"); 20 | 21 | session = setup(new BrowserConfiguration(BrowserType.FIREFOX, 1), 200, 200); 22 | } 23 | 24 | @Override 25 | protected CrawlSession getSession() { 26 | return session; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/largetests/LargeIETest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.largetests; 2 | 3 | import static org.hamcrest.Matchers.containsString; 4 | import static org.junit.Assume.assumeThat; 5 | 6 | import org.junit.BeforeClass; 7 | import org.junit.experimental.categories.Category; 8 | 9 | import com.crawljax.browser.EmbeddedBrowser.BrowserType; 10 | import com.crawljax.core.CrawlSession; 11 | import com.crawljax.core.configuration.BrowserConfiguration; 12 | import com.crawljax.test.BrowserTest; 13 | 14 | @Category(BrowserTest.class) 15 | public class LargeIETest extends LargeTestBase { 16 | 17 | private static CrawlSession session; 18 | 19 | @BeforeClass 20 | public static void setUpBeforeClass() throws Exception { 21 | assumeThat(System.getProperty("os.name").toLowerCase(), containsString("windows")); 22 | 23 | session = setup(new BrowserConfiguration(BrowserType.INTERNET_EXPLORER), 400, 400); 24 | } 25 | 26 | @Override 27 | protected CrawlSession getSession() { 28 | return session; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/largetests/LargePhantomJSTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.largetests; 2 | 3 | import com.crawljax.browser.EmbeddedBrowser.BrowserType; 4 | import com.crawljax.core.CrawlSession; 5 | import com.crawljax.core.configuration.BrowserConfiguration; 6 | import com.crawljax.test.BrowserTest; 7 | import com.crawljax.test.Utils; 8 | 9 | import org.junit.BeforeClass; 10 | import org.junit.experimental.categories.Category; 11 | 12 | @Category(BrowserTest.class) 13 | public class LargePhantomJSTest extends LargeTestBase { 14 | 15 | private static CrawlSession session; 16 | 17 | @BeforeClass 18 | public static void setUpBeforeClass() throws Exception { 19 | Utils.assumeBinary("phantomjs.binary.path", "phantomjs"); 20 | 21 | session = setup(new BrowserConfiguration(BrowserType.PHANTOMJS, 1), 200, 200); 22 | } 23 | 24 | @Override 25 | protected CrawlSession getSession() { 26 | return session; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/state/ElementTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.state; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.junit.Assert.assertThat; 5 | 6 | import java.io.IOException; 7 | 8 | import org.apache.commons.lang3.SerializationUtils; 9 | import org.junit.Test; 10 | import org.w3c.dom.Node; 11 | import org.xml.sax.SAXException; 12 | 13 | public class ElementTest { 14 | 15 | @Test 16 | public void testSerializability() throws SAXException, IOException { 17 | String HTML = 18 | " " 19 | + "" 20 | + "
" 21 | + "test
"; 22 | StateVertex sv = new StateVertexImpl(0, "test", HTML); 23 | 24 | Node node = sv.getDocument().getElementById("thea"); 25 | Element element = new Element(node); 26 | 27 | byte[] serialized = SerializationUtils.serialize(element); 28 | Element deserializedElement = (Element) SerializationUtils.deserialize(serialized); 29 | assertThat(element, is(deserializedElement)); 30 | assertThat(element.getElementId(), is(deserializedElement.getElementId())); 31 | 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/state/PostCrawlStateGraphChecker.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.state; 2 | 3 | import static org.hamcrest.core.Is.is; 4 | import static org.hamcrest.core.IsNull.notNullValue; 5 | import static org.hamcrest.number.OrderingComparison.greaterThanOrEqualTo; 6 | import static org.junit.Assert.assertThat; 7 | 8 | import com.crawljax.core.CrawlSession; 9 | import com.crawljax.core.ExitNotifier.ExitStatus; 10 | import com.crawljax.core.plugin.PostCrawlingPlugin; 11 | 12 | /** 13 | * This {@link PostCrawlingPlugin} checks the {@link InMemoryStateFlowGraph} for consistency after 14 | * the crawl is done. 15 | */ 16 | public class PostCrawlStateGraphChecker implements PostCrawlingPlugin { 17 | 18 | @Override 19 | public void postCrawling(CrawlSession session, ExitStatus status) { 20 | StateFlowGraph stateFlowGraph = session.getStateFlowGraph(); 21 | 22 | allStatesHaveOneOreMoreIncomingEdges(stateFlowGraph); 23 | 24 | allEdgesConnectTwoStates(stateFlowGraph); 25 | } 26 | 27 | private void allStatesHaveOneOreMoreIncomingEdges(StateFlowGraph stateFlowGraph) { 28 | for (StateVertex state : stateFlowGraph.getAllStates()) { 29 | if (stateFlowGraph.getInitialState().getId() != state.getId()) { 30 | assertThat(stateFlowGraph.getIncomingClickable(state).size(), 31 | is(greaterThanOrEqualTo(1))); 32 | } 33 | } 34 | } 35 | 36 | private void allEdgesConnectTwoStates(StateFlowGraph stateFlowGraph) { 37 | for (Eventable eventable : stateFlowGraph.getAllEdges()) { 38 | assertThat(eventable.getSourceStateVertex(), is(notNullValue())); 39 | assertThat(eventable.getTargetStateVertex(), is(notNullValue())); 40 | } 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return this.getClass().getSimpleName(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/core/state/StatesContainElementsTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.core.state; 2 | 3 | import static org.hamcrest.collection.IsEmptyCollection.empty; 4 | import static org.hamcrest.core.Is.is; 5 | import static org.hamcrest.core.IsNot.not; 6 | import static org.junit.Assert.assertThat; 7 | 8 | import java.util.Set; 9 | 10 | import org.eclipse.jetty.util.resource.Resource; 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | import org.junit.experimental.categories.Category; 14 | 15 | import com.crawljax.core.CrawlSession; 16 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; 17 | import com.crawljax.test.BaseCrawler; 18 | import com.crawljax.test.BrowserTest; 19 | 20 | @Category(BrowserTest.class) 21 | public class StatesContainElementsTest { 22 | 23 | private CrawlSession crawl; 24 | 25 | @Before 26 | public void setup() { 27 | crawl = new BaseCrawler(Resource.newClassPathResource("demo-site")) { 28 | @Override 29 | protected CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() { 30 | return super.newCrawlConfigurationBuilder() 31 | .setMaximumStates(2); 32 | } 33 | }.crawl(); 34 | } 35 | 36 | @Test 37 | public void whenCrawledTheStateVertexesContainEvents() { 38 | Set allStates = crawl.getStateFlowGraph().getAllStates(); 39 | for (StateVertex stateVertex : allStates) { 40 | if ("index".equals(stateVertex.getName())) { 41 | assertThat(stateVertex.getCandidateElements(), is(not(empty()))); 42 | } 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/crawls/CrawlWithCustomScopeTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.crawls; 2 | 3 | import static org.hamcrest.core.Is.is; 4 | import static org.hamcrest.core.IsCollectionContaining.hasItems; 5 | import static org.junit.Assert.assertThat; 6 | 7 | import java.net.URI; 8 | import java.util.HashSet; 9 | import java.util.Set; 10 | 11 | import org.junit.Test; 12 | import org.junit.experimental.categories.Category; 13 | 14 | import com.crawljax.core.CrawlSession; 15 | import com.crawljax.core.configuration.CrawlScope; 16 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder; 17 | import com.crawljax.core.state.StateVertex; 18 | import com.crawljax.test.BaseCrawler; 19 | import com.crawljax.test.BrowserTest; 20 | 21 | @Category(BrowserTest.class) 22 | public class CrawlWithCustomScopeTest { 23 | 24 | @Test 25 | public void crawlsPagesOnlyInCustomScope() throws Exception { 26 | CrawlScope crawlScope = 27 | url -> url.contains("in_scope") || url.endsWith("crawlscope/index.html"); 28 | BaseCrawler baseCrawler = new BaseCrawler("crawlscope") { 29 | @Override 30 | public CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() { 31 | CrawljaxConfigurationBuilder builder = 32 | super.newCrawlConfigurationBuilder(); 33 | builder.setCrawlScope(crawlScope); 34 | return builder; 35 | } 36 | }; 37 | 38 | CrawlSession crawlSession = baseCrawler.crawl(); 39 | 40 | URI baseUrl = baseCrawler.getWebServer().getSiteUrl(); 41 | Set crawledUrls = new HashSet<>(); 42 | for (StateVertex state : crawlSession.getStateFlowGraph().getAllStates()) { 43 | crawledUrls.add(state.getUrl()); 44 | } 45 | 46 | assertThat(crawledUrls, hasItems( 47 | baseUrl + "crawlscope", 48 | baseUrl + "crawlscope/in_scope.html", 49 | baseUrl + "crawlscope/in_scope_inner.html")); 50 | assertThat(crawledUrls.size(), is(3)); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/forms/RandomInputValueGeneratorTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.forms; 2 | 3 | import static org.hamcrest.core.Is.is; 4 | import static org.junit.Assert.assertThat; 5 | 6 | import java.util.HashSet; 7 | import java.util.Set; 8 | 9 | import org.junit.Before; 10 | import org.junit.Test; 11 | 12 | import com.google.common.collect.Lists; 13 | 14 | public class RandomInputValueGeneratorTest { 15 | private static final int NUM_RAND_CHECKS = 1000; 16 | private static final int LENGTH_SHORT = 1; 17 | private static final int LENGTH_MEDIUM = 15; 18 | private static final int LENGTH_LONG = 150; 19 | 20 | private RandomInputValueGenerator generator; 21 | 22 | @Before 23 | public void setup() { 24 | generator = new RandomInputValueGenerator(); 25 | } 26 | 27 | @Test 28 | public void randomValuesAreUnique() { 29 | Set set = new HashSet<>(); 30 | for (int i = 0; i < NUM_RAND_CHECKS; i++) { 31 | assertThat(set.add(generator.getRandomString(LENGTH_MEDIUM)), is(true)); 32 | } 33 | } 34 | 35 | @Test 36 | public void testLengthSpecification() { 37 | assertThat(generator.getRandomString(LENGTH_SHORT).length(), is(LENGTH_SHORT)); 38 | assertThat(generator.getRandomString(LENGTH_MEDIUM).length(), is(LENGTH_MEDIUM)); 39 | assertThat(generator.getRandomString(LENGTH_LONG).length(), is(LENGTH_LONG)); 40 | } 41 | 42 | @Test(expected = IllegalArgumentException.class) 43 | public void randomOptionDoesntAcceptEmptyLists() { 44 | generator.getRandomItem(Lists.newArrayList()); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/test/BrowserTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.test; 2 | 3 | /** 4 | * Marker interface for a test that requires a Browser 5 | */ 6 | public interface BrowserTest { 7 | 8 | } 9 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/test/Utils.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.test; 2 | 3 | import static org.hamcrest.core.Is.is; 4 | import static org.junit.Assume.assumeThat; 5 | 6 | import java.io.IOException; 7 | 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | /** 12 | * Utilities for tests. 13 | */ 14 | public final class Utils { 15 | 16 | private static final Logger LOG = LoggerFactory.getLogger(Utils.class); 17 | 18 | private Utils() { 19 | } 20 | 21 | public static void assumeBinary(String systemProperty, String binaryName) 22 | throws Exception { 23 | assumeThat(System.getProperty(systemProperty) != null 24 | || isOnClassPath(binaryName), is(true)); 25 | } 26 | 27 | private static boolean isOnClassPath(String binaryName) 28 | throws IOException, InterruptedException { 29 | try { 30 | if (!System.getProperty("os.name").startsWith("Windows")) { 31 | Process exec = Runtime.getRuntime().exec("which " + binaryName); 32 | boolean found = exec.waitFor() == 0; 33 | LOG.info("Found {} on the classpath = {}", binaryName, found); 34 | return found; 35 | } 36 | } catch (RuntimeException e) { 37 | LOG.info("Could not determine if {} is on the classpath: {}", binaryName, 38 | e.getMessage()); 39 | } 40 | return false; 41 | } 42 | } -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/test/WebServer.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.test; 2 | 3 | import java.net.URI; 4 | 5 | import com.google.common.base.Preconditions; 6 | import org.eclipse.jetty.server.Server; 7 | import org.eclipse.jetty.server.ServerConnector; 8 | import org.eclipse.jetty.server.handler.ResourceHandler; 9 | import org.eclipse.jetty.util.resource.Resource; 10 | 11 | public class WebServer { 12 | private final Resource resource; 13 | 14 | private int port; 15 | private URI demoSite; 16 | private Server server; 17 | private boolean started; 18 | 19 | /** 20 | * @param classPathResource 21 | * The name of the resource. This resource must be on the test or regular classpath. 22 | */ 23 | public WebServer(Resource classPathResource) { 24 | resource = classPathResource; 25 | } 26 | 27 | public void start() throws Exception { 28 | server = new Server(0); 29 | ResourceHandler handler = new ResourceHandler(); 30 | handler.setBaseResource(resource); 31 | server.setHandler(handler); 32 | server.start(); 33 | this.port = ((ServerConnector) server.getConnectors()[0]).getLocalPort(); 34 | this.demoSite = URI.create("http://localhost:" + port + "/"); 35 | this.started = true; 36 | } 37 | 38 | public URI getSiteUrl() { 39 | checkServerStarted(); 40 | return demoSite; 41 | } 42 | 43 | public int getPort() { 44 | checkServerStarted(); 45 | return port; 46 | } 47 | 48 | public void stop() { 49 | checkServerStarted(); 50 | try { 51 | server.stop(); 52 | } catch (Exception e) { 53 | throw new RuntimeException("Could not stop the server", e); 54 | } 55 | } 56 | 57 | private void checkServerStarted() { 58 | Preconditions.checkState(started, "Server not started"); 59 | } 60 | 61 | public void join() throws InterruptedException { 62 | server.join(); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/test/WebServerTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.test; 2 | 3 | 4 | import static org.hamcrest.Matchers.*; 5 | import static org.junit.Assert.assertThat; 6 | 7 | import java.io.IOException; 8 | import java.net.URI; 9 | 10 | import com.crawljax.core.CrawljaxException; 11 | import org.eclipse.jetty.util.resource.Resource; 12 | import org.junit.After; 13 | import org.junit.Before; 14 | import org.junit.Test; 15 | 16 | public class WebServerTest { 17 | private URI site; 18 | private WebServer server; 19 | 20 | private static final int MAX_PORT = 65535; 21 | private static final int MIN_PORT = 0; 22 | 23 | @Before 24 | public void setup() throws Exception { 25 | site = BaseCrawler.class.getResource("/site").toURI(); 26 | try { 27 | server = new WebServer(Resource.newResource(site)); 28 | } 29 | catch (IOException e) { 30 | throw new CrawljaxException("Could not load resource", e); 31 | } 32 | server.start(); 33 | } 34 | 35 | @After 36 | public void stopServer() { 37 | server.stop(); 38 | } 39 | 40 | @Test 41 | public void testPort() throws Exception { 42 | assertThat(server.getPort(), is(lessThanOrEqualTo(MAX_PORT))); 43 | assertThat(server.getPort(), is(greaterThanOrEqualTo(MIN_PORT))); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/test/matchers/FileMatcher.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.test.matchers; 2 | 3 | import java.io.File; 4 | 5 | import org.hamcrest.Description; 6 | import org.hamcrest.Factory; 7 | import org.hamcrest.Matcher; 8 | import org.hamcrest.TypeSafeMatcher; 9 | 10 | public class FileMatcher extends TypeSafeMatcher { 11 | 12 | @Override 13 | public boolean matchesSafely(File file) { 14 | return file.exists(); 15 | } 16 | 17 | @Override 18 | public void describeTo(Description description) { 19 | description.appendText("file which exists"); 20 | } 21 | 22 | @Factory 23 | public static Matcher exists() { 24 | return new FileMatcher(); 25 | } 26 | 27 | } -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/util/DOMComparerTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.util; 2 | 3 | import static org.hamcrest.core.Is.is; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertNotNull; 6 | import static org.junit.Assert.assertThat; 7 | 8 | import java.io.IOException; 9 | import java.util.List; 10 | 11 | import org.custommonkey.xmlunit.Difference; 12 | import org.hamcrest.collection.IsEmptyCollection; 13 | import org.junit.Test; 14 | import org.w3c.dom.Document; 15 | 16 | /** 17 | * Test the comparisons between two Documents. 18 | * 19 | * @author Singla 20 | */ 21 | 22 | public class DOMComparerTest { 23 | 24 | @Test 25 | public void compareNoDifference() throws IOException { 26 | String html = "

No difference

"; 27 | 28 | Document control = DomUtils.asDocument(html); 29 | assertNotNull(control); 30 | 31 | Document test = DomUtils.asDocument(html); 32 | assertNotNull(test); 33 | 34 | DOMComparer dc = new DOMComparer(control, test); 35 | 36 | List differences = dc.compare(); 37 | assertThat(differences, is(IsEmptyCollection.empty())); 38 | } 39 | 40 | @Test 41 | public void comparePartialDifference() throws IOException { 42 | String controlHTML = 43 | "
Crawljax

There are differences

"; 44 | String testHTML = 45 | "Crawljax

There are differences."; 46 | final int EXPECTED_DIFF = 7; 47 | 48 | Document control = DomUtils.asDocument(controlHTML); 49 | assertNotNull(control); 50 | 51 | Document test = DomUtils.asDocument(testHTML); 52 | assertNotNull(test); 53 | 54 | DOMComparer dc = new DOMComparer(control, test); 55 | 56 | List differences = dc.compare(); 57 | assertEquals("Error: Did not find 5 differences", differences.size(), EXPECTED_DIFF); 58 | 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/util/DomUtilsBrowserTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.util; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | 5 | import java.io.IOException; 6 | import java.net.URISyntaxException; 7 | import java.net.URL; 8 | 9 | import com.crawljax.browser.BrowserProvider; 10 | import com.crawljax.browser.EmbeddedBrowser; 11 | import com.crawljax.test.BrowserTest; 12 | import org.junit.Before; 13 | import org.junit.Rule; 14 | import org.junit.Test; 15 | import org.junit.experimental.categories.Category; 16 | import org.w3c.dom.Document; 17 | import org.xml.sax.SAXException; 18 | 19 | /** 20 | * Test for the Helper class. 21 | */ 22 | @Category(BrowserTest.class) 23 | public class DomUtilsBrowserTest { 24 | 25 | 26 | @Rule 27 | public BrowserProvider provider = new BrowserProvider(); 28 | 29 | private EmbeddedBrowser browser; 30 | 31 | @Before 32 | public void before() throws URISyntaxException { 33 | browser = provider.newEmbeddedBrowser(); 34 | URL url = DomUtilsBrowserTest.class.getResource("/site/index.html"); 35 | browser.goToUrl(url.toURI()); 36 | } 37 | 38 | /** 39 | * Test get document from browser function. 40 | */ 41 | @Test 42 | public void testGetDocumentFromBrowser() throws SAXException, IOException { 43 | 44 | String html = browser.getStrippedDom(); 45 | assertNotNull(html); 46 | Document doc = DomUtils.asDocument(html); 47 | assertNotNull(doc); 48 | 49 | browser.close(); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/util/HtmlNamespaceTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.util; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertTrue; 5 | 6 | import javax.xml.XMLConstants; 7 | 8 | import org.junit.Test; 9 | 10 | public class HtmlNamespaceTest { 11 | 12 | @Test 13 | public void testgetNamespaceURI() { 14 | HtmlNamespace testNamespace = new HtmlNamespace(); 15 | 16 | String testPrefix = null; 17 | boolean testPass = false; 18 | 19 | try { 20 | testNamespace.getNamespaceURI(testPrefix); 21 | } catch (NullPointerException e) { 22 | testPass = true; 23 | } 24 | assertTrue(testPass); 25 | 26 | assertEquals(XMLConstants.DEFAULT_NS_PREFIX, testNamespace.getNamespaceURI("gibberish")); 27 | 28 | assertEquals("http://www.w3.org/1999/xhtml", testNamespace.getNamespaceURI("html")); 29 | 30 | assertEquals(XMLConstants.XML_NS_URI, testNamespace.getNamespaceURI("xml")); 31 | } 32 | 33 | } -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/util/XMLObjectTest.java: -------------------------------------------------------------------------------- 1 | package com.crawljax.util; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertTrue; 5 | import static org.junit.Assert.fail; 6 | 7 | import java.io.File; 8 | import java.io.FileNotFoundException; 9 | import java.util.ArrayList; 10 | 11 | import org.junit.Test; 12 | 13 | public class XMLObjectTest { 14 | 15 | private final static String filename = "xmlobject-save-to-file-test.xml"; 16 | 17 | @SuppressWarnings("unchecked") 18 | @Test 19 | public void saveToFile() { 20 | ArrayList object = new ArrayList(); 21 | object.add("Bla"); 22 | object.add("Something else"); 23 | 24 | try { 25 | XMLObject.objectToXML(object, filename); 26 | } catch (FileNotFoundException e) { 27 | fail("Error saving object"); 28 | } 29 | File f = new File(filename); 30 | assertTrue(f.exists()); 31 | 32 | object = null; 33 | try { 34 | object = (ArrayList) XMLObject.xmlToObject(filename); 35 | } catch (FileNotFoundException e) { 36 | fail("File not found"); 37 | } 38 | 39 | assertEquals(2, object.size()); 40 | assertEquals("Bla", object.get(0)); 41 | assertEquals("Something else", object.get(1)); 42 | 43 | assertTrue(f.delete()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /core/src/test/java/com/crawljax/util/XmlunitDifferenceTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | 3 | package com.crawljax.util; 4 | 5 | import java.util.List; 6 | 7 | import org.custommonkey.xmlunit.Difference; 8 | import org.junit.Assert; 9 | import org.junit.Test; 10 | 11 | import com.google.common.collect.Lists; 12 | 13 | /** 14 | * Test the useage of the Helper.getDifferences. 15 | * 16 | * @author slenselink@google.com (Stefan Lenselink) 17 | */ 18 | public class XmlunitDifferenceTest { 19 | 20 | @Test 21 | public void testEmptyDoms() { 22 | String left = ""; 23 | String right = ""; 24 | List l = DomUtils.getDifferences(left, right); 25 | Assert.assertEquals(0, l.size()); 26 | } 27 | 28 | @Test 29 | public void testSameIdenticalDoms() { 30 | String left = ""; 31 | String right = ""; 32 | List l = DomUtils.getDifferences(left, right); 33 | Assert.assertEquals(0, l.size()); 34 | } 35 | 36 | @Test 37 | public void testSameDomsArrtibutesSame() { 38 | String left = ""; 39 | String right = ""; 40 | List l = DomUtils.getDifferences(left, right); 41 | Assert.assertEquals(0, l.size()); 42 | } 43 | 44 | @Test 45 | public void testSameDomsArrtibutesFiltered() { 46 | String left = ""; 47 | String right = ""; 48 | List l = DomUtils.getDifferences(left, right, Lists.newArrayList("value")); 49 | Assert.assertEquals(0, l.size()); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /core/src/test/resources/candidateElementExtractorTest/domWithFourTypeDownloadLink.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Simple page 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /core/src/test/resources/candidateElementExtractorTest/domWithOneExternalAndTwoInternal.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /core/src/test/resources/configuration/crawljax.properties: -------------------------------------------------------------------------------- 1 | ### select the embedded browser: 2 | ### webdriver.ie (Note: supports only onclick) 3 | ### webdriver.firefox (Note: supports only onclick) 4 | browser = webdriver.firefox 5 | 6 | ### The URL of the site to be crawled 7 | site.url = http://spci.st.ewi.tudelft.nl/demo/wishlist/ 8 | 9 | ### The events generated on DOM elements: onclick, onmouseover, onblur, onkeydown... 10 | robot.events = onclick 11 | 12 | ### The tag elements that build up the candidate elements: 13 | ### FORMAT: tag-name:{(attr-name=attr-value+;? ?)*} 14 | ### example: a:{attr=value}, div:{class=aha; id=room}, span:{} 15 | crawl.tags = a:{class=remotetop} 16 | 17 | ### The tag elements that should be excluded from crawling 18 | crawl.tags.exclude = a:{class=remoteleft} 19 | 20 | # click each element only once, 1 means click only once. 21 | # 0 means click on every new state. 22 | click.once = 1 23 | 24 | ### The depth level of the breadth-first search 25 | crawl.depth = 5 26 | 27 | ### The number of ms to wait for initialization 28 | crawl.wait.reload = 400 29 | 30 | ### The number of ms (e.g., 1000) to wait after an event 31 | crawl.wait.event = 400 32 | 33 | ### max number of states - set 0 to ignore 34 | crawl.max.states = 0 35 | 36 | ### max crawling time (seconds) - set 0 to ignore 37 | crawl.max.runtime = 0 38 | 39 | ### enter/choose random input values for form elements - set 1 to enable 40 | crawl.forms.randominput = 1 41 | 42 | ### number of threads used for crawling (support for multi-threaded crawling) 43 | crawl.numberOfThreads = 1 44 | 45 | ### The attributes (in regular expression) which should be stripped from the DOM before it is used. 46 | crawl.filter.attributes = closure_hashcode_(\\w)*, jquery[0-9]+ 47 | -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/conditions.html: -------------------------------------------------------------------------------- 1 |

Conditions

2 | 3 | TODO -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/crawlConditions.html: -------------------------------------------------------------------------------- 1 |

CrawlConditions

2 | 3 |

Crawljax has CrawlConditions which can be used to guide the crawling with a dynamic approach. When Crawljax finds a new state, it only crawls it when all the CrawlConditions are satisfied. If no CrawlConditions are specified, all the states are crawled.

4 | 5 |

Methods in CrawlSpecification

6 |

7 | 8 | addCrawlCondition(String description, Condition crawlCondition)
9 | addCrawlCondition(String description, Condition crawlCondition, Condition... preConditions) 10 |
11 |

12 | 13 |

Example 1

14 |

Crawljax should onlyl crawl pages with the text foo in the URL.

15 |
16 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
17 | ...
18 | UrlCondition onlyFooDomain = new UrlCondition("foo"));
19 | builder.addCrawlCondition("Only crawl foo site", onlyFooDomain);
20 | 
21 | 22 |

Example 2

23 |

Crawljax should never crawl a page with a span with the class 'foo'.

24 |
25 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
26 | ...
27 | NotXPathCondition noFooClass = new NotXPathCondition("//SPAN[@class='foo']"));
28 | builder.addCrawlCondition("No spans with foo as class", noFooClass);
29 | 
30 | 31 |

This page contains a foo and should therefore not be crawled by Crawljax. Thus think link should not be clicked by Crawljax

32 | 33 | -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/customInput.html: -------------------------------------------------------------------------------- 1 |

Forms

2 |

Crawljax can fill in customs values in input elements

3 |
4 |

Contact

5 | 6 | 7 | 8 | 9 | 10 | 15 | 16 |
Gender:Male Female
Name:
Phone:
Mobile:
Type:
Active:
17 |

18 | 19 | 20 |

21 | 22 |
23 |

Example Code

24 |
25 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
26 | ...
27 | InputSpecification input = new InputSpecification();
28 | input.field("male").setValue(true);
29 | input.field("name").setValue("Bob");
30 | input.fields("phone", "mobile").setValue("1234567890");
31 | input.field("type").setValue("Student");
32 | input.field("active").setValue(true);
33 | ...
34 | builder.setInputSpecification(input);
35 | 
36 |
37 | -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/forms.html: -------------------------------------------------------------------------------- 1 |

Forms

2 |

Crawljax can fill in multiple values in forms

3 |
4 |

Contact

5 | 6 | 7 | 8 | 9 | 14 | 15 |
Gender:Male Female
Name:
Phone:
Type:
Active:
16 |

17 | 18 | 19 |

20 | 21 |
22 |

Example Code

23 |
24 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
25 | ...
26 | InputSpecification input = new InputSpecification();
27 | Form contactForm = new Form();
28 | contactForm.field("male").setValues(true, false);
29 | contactForm.field("female").setValues(false, true);
30 | contactForm.field("name").setValues("Bob", "Alice", "John");
31 | contactForm.field("phone").setValues("1234567890", "1234888888", "");
32 | contactForm.field("type").setValues("Student", "Teacher");
33 | contactForm.field("active").setValues(true);
34 | input.setValuesInForm(contactForm).beforeClickElement("button").withText("Save");
35 | ...
36 | builder.setInputSpecification(input);
37 | 
38 |
39 | -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/img/nav.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zaproxy/crawljax/4ac51f5f16bfab25edcc40abeb0b0233aa5798bc/core/src/test/resources/demo-site/img/nav.gif -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/info.html: -------------------------------------------------------------------------------- 1 |

Information

2 | 3 |

Crawling Ajax applications through dynamic analysis and reconstruction of the UI state changes. Crawljax is based on a method which dynamically builds a `state-flow graph' modeling the various navigation paths and states within an Ajax application.

4 | See more here -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/invariants.html: -------------------------------------------------------------------------------- 1 |

Invariants

2 |

Invariants can be used to perform tests on the current state

3 | 4 |

Methods in CrawlSpecification

5 |

6 | 7 | addInvariant(String description, Condition condition)
8 | addInvariant(String description, Condition condition, Condition...preConditions) 9 |

10 |

11 | 12 |

Example 1

13 |

Use the generic conditions

14 |
15 | addInvariant("No error messages", new NotRegexCondition("Error [0-9]+");
16 | 
17 | 18 |

Example 2

19 |

Create your own condition

20 |
21 | crawler.addInvariant("Test count myList", new ConditionAbstract(){
22 | 
23 |   @Override
24 |     public boolean check(EmbeddedBrowser browser) {
25 |       WebDriver driver = browser.getDriver();
26 |       try{
27 |         WebElement myList = driver.findElement(By.id("myList"));
28 |         return new Select(myList).getOptions().size() > 0;
29 |       }catch(NoSuchElementException e){
30 |         //not found
31 |         return true;
32 |       }
33 |     }
34 | });
35 | 
36 | -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/js/general.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function(){ 2 | openPage("home.html"); 3 | setEventHandlers(); 4 | //addError("how rude"); 5 | //addError("how kewl"); 6 | }); 7 | 8 | function setEventHandlers(){ 9 | $('#info').click(function(){ openPage("info.html") } ); 10 | $('#papers').click(function(){ openPage("papers.html") } ); 11 | $('#home').click(function(){ openPage("home.html") } ); 12 | $('#ignore').click(function(){ addError("Crawljax should ignore me") } ); 13 | } 14 | 15 | function openPage(page){ 16 | $('#content').load(page); 17 | } 18 | 19 | function addError(msg){ 20 | $('#errors').show(); 21 | $('#errors').html($('#errors').html() + "

ERROR: " + msg + "

"); 22 | } 23 | 24 | 25 | 26 | function saveContact(msg){ 27 | var content = "

" + msg + "

"; 28 | var gender = (document.getElementById("male").checked ? "male" : "female"); 29 | content += "

Gender: " + gender + "
"; 30 | content += "Name: " + $('#name').val() + "
"; 31 | content += "Phone: " + $('#phone').val() + "
"; 32 | content += "Mobile: " + $('#mobile').val() + "
"; 33 | content += "Type: " + document.getElementById("type").options[document.getElementById("type").selectedIndex].value + "
"; 34 | content += "Active: " + document.getElementById("active").checked; 35 | content += "

"; 36 | $('#content').html(content); 37 | } 38 | 39 | function afterRandomInput(){ 40 | var content = "

filled in random values

"; 41 | content += "text: " + document.getElementById("text").value + "
"; 42 | content += "checkbox: " + document.getElementById("checkbox").checked + "
"; 43 | content += "radio: " + document.getElementById("radio").checked + "
"; 44 | content += "Select: " + document.getElementById("select").value; 45 | content += "

"; 46 | 47 | $('#content').html(content); 48 | 49 | } 50 | -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/oracleComparators.html: -------------------------------------------------------------------------------- 1 |

Oracle Comparators

2 | 3 |

Methods in CrawlSpecification

4 |

5 | 6 | addOracleComparator(String id, Oracle oracleComparator)
7 | addOracleComparator(String id, Oracle oracleComparator, Condition...preConditions) 8 |

9 | The argument id is used to indenty the oracles comparators easily. For example, there could me multple AttributeOracle comparators. 10 |

11 | 12 |

Example

13 |

Example of an Oracle Comparator

14 |
15 | public class IgnoreCasingOracle extends OracleAbstract {
16 | 
17 |   @Override
18 |   public boolean isEquivalent() {
19 |     return getOriginalDom().equalsIgnoreCase(getNewDom());
20 |   }
21 | }
22 | 
23 |

Adding to the CrawlSpecification

24 |

25 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
26 | ...
27 | UrlCondition onFooPage = new UrlhCondition("#foo"));
28 | builder.addOracleComparator("Ignore casing on foo pages", new IgnoreCasingOracle())
29 | 
30 | 31 |

Oracle Comparator Pipelining

32 | TODO 33 | -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/papers.html: -------------------------------------------------------------------------------- 1 |

Papers

2 | The following papers related to Crawljax are published: 3 | 9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/demo-site/randomInput.html: -------------------------------------------------------------------------------- 1 |

Random Input

2 | 3 |

Crawljax enters random input while crawling of no inputvalues are specified

4 | 5 |

When Crawljax finds form input elements in a state, it sets a random value in the elements:

6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 37 | 38 | 39 |
TypeExampleRandom value
Text fields
14 | 15 |
Random string of 8 alpha characters
CheckboxesChecks with p < 0.5
Radio buttonsChecks with p < 0.5
ListsUniform selects a random item from the list
40 |

41 | 42 |

Next
-------------------------------------------------------------------------------- /core/src/test/resources/demo-site/waitConditions.html: -------------------------------------------------------------------------------- 1 |

WaitConditions

2 | 3 |

Websites can have widgets that load relatively slow, which can cause flakyness in the crawling or while testing. With WaitConditions the user can specify that in wich url (or part of the url) Crawljax should wait for certain elements or widgets to become visible.

4 | 5 |

Methods in CrawlSpecification

6 |

7 | 8 | waitFor(String url, ExpectedCondition...expectedConditions) {
9 | waitFor(String url, int timeout, ExpectedCondition...expectedConditions) 10 |
11 |

12 | 13 | 14 |

Example

15 |

Wait for a list to be loaded on the #contact page.

16 |
17 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
18 | ...
19 | builder.waitFor("#contact", new ExpectedVisibleCondition(By.id("contactList")));
20 | 
21 | 22 |

Note: currently only supported when WebDriver is used

23 | -------------------------------------------------------------------------------- /core/src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /core/src/test/resources/realm.properties: -------------------------------------------------------------------------------- 1 | test: MD5:438E713A9EE9256F50816C1DACECCD9C, user -------------------------------------------------------------------------------- /core/src/test/resources/site/concurrentcrawl1/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Index ABC 5 | 6 | 7 |

Index ABC

8 |

Pages ABC

9 | A 10 | B 11 | C 12 | 13 | -------------------------------------------------------------------------------- /core/src/test/resources/site/concurrentcrawl1/page_a.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | A 5 | 6 | 7 |

Page A

8 |

Page A.

9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/concurrentcrawl1/page_b.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | B 5 | 6 | 7 |

Page B

8 |

Page B.

9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/concurrentcrawl1/page_c.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | C 5 | 6 | 7 |

Page C

8 |

Page C.

9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/concurrentcrawl2/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Index XYZ 5 | 6 | 7 |

Index XYZ

8 |

Pages XYZ

9 | X 10 | Y 11 | Z 12 | 13 | -------------------------------------------------------------------------------- /core/src/test/resources/site/concurrentcrawl2/page_x.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | X 5 | 6 | 7 |

Page X

8 |

Page X.

9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/concurrentcrawl2/page_y.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Y 5 | 6 | 7 |

Page Y

8 |

Page Y.

9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/concurrentcrawl2/page_z.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Z 5 | 6 | 7 |

Page Z

8 |

Page Z.

9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawlconditions/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Index 5 | 6 | 7 |
8 |

Index

9 |

This is the index. All other pages should not be crawled due to 10 | crawl conditions.

11 |
12 | 13 | 14 | 15 |
This is the container. Text and html will be 16 | loaded here.
17 | 18 |

19 | 20 | 29 | 30 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |
7 | 17 | S1
18 | S2
19 |
20 | 21 | 22 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_10.html: -------------------------------------------------------------------------------- 1 | 8 | S10
9 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_11.html: -------------------------------------------------------------------------------- 1 | Final state S10 -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_2.html: -------------------------------------------------------------------------------- 1 | Final state S2 -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_3.html: -------------------------------------------------------------------------------- 1 | 11 | S3
12 | S6
13 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_4.html: -------------------------------------------------------------------------------- 1 | 8 | S4
9 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_5.html: -------------------------------------------------------------------------------- 1 | 8 | S5
9 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_6.html: -------------------------------------------------------------------------------- 1 | Final state S5 -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_7.html: -------------------------------------------------------------------------------- 1 | 11 | S7
12 | S9
13 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_8.html: -------------------------------------------------------------------------------- 1 | 8 | S8
9 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawler/payload_9.html: -------------------------------------------------------------------------------- 1 | Final state S8 -------------------------------------------------------------------------------- /core/src/test/resources/site/crawlscope/in_scope.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | In Scope 5 | 6 | 7 |

In Scope

8 |

This page can be accessed and crawled.

9 | inner page 10 | 11 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawlscope/in_scope_inner.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | In Scope Inner Page 5 | 6 | 7 |

In Scope Inner Page

8 |

This page should be accessed.

9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawlscope/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Index Scope Test 5 | 6 | 7 |

Index Scope Test

8 |

Pages that are in and out of crawl scope.

9 | out of scope 10 | in scope 11 | 12 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawlscope/out_of_scope.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Out of Scope 5 | 6 | 7 |

Out of Scope

8 |

This page can be accessed but not crawled.

9 | inner page 10 | 11 | -------------------------------------------------------------------------------- /core/src/test/resources/site/crawlscope/out_of_scope_inner.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Out of Scope Inner Page 5 | 6 | 7 |

Out of Scope Inner Page

8 |

This page should not be accessed.

9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/download/download.blob: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zaproxy/crawljax/4ac51f5f16bfab25edcc40abeb0b0233aa5798bc/core/src/test/resources/site/download/download.blob -------------------------------------------------------------------------------- /core/src/test/resources/site/download/download.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Simple page 4 | 5 | 6 |

Simple download page

7 | Download the jquery file 8 | This link should be clicked in stead of being stuck on the previous download link. 9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/formhandler/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Index Form Handler Test 5 | 6 | 7 |

Index Form Handler Test

8 | 9 | 10 | -------------------------------------------------------------------------------- /core/src/test/resources/site/hidden-elements-site/a.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Index 4 | 5 | 6 |

This is A

7 | 8 | -------------------------------------------------------------------------------- /core/src/test/resources/site/hidden-elements-site/b.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | B 4 | 5 | 6 |

This is B

7 | Go to C 8 | 9 | -------------------------------------------------------------------------------- /core/src/test/resources/site/hidden-elements-site/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Index 4 | 5 | 6 |
7 |

Index

8 |

This is the index and it shows two more sites:

9 |
    10 |
  • A is clickable as a anchor with an href leading to a.html
  • 11 |
  • B is clickable as a anchor but without an href. It has a Javascript function attached that directs you to B.
  • 12 |
13 |
14 |

Hover me to see the links

15 | 21 |
22 |
23 | 24 | 38 | 39 | -------------------------------------------------------------------------------- /core/src/test/resources/site/home.html: -------------------------------------------------------------------------------- 1 | HOMEPAGE 2 | This state changes every time, but should be added only once because of the oracle comparators 3 | 4 |
Random style
5 |
-------------------------------------------------------------------------------- /core/src/test/resources/site/iframe/iframe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Iframe 4 | 5 | 6 | 7 | 13 | 14 | 15 | 16 | 17 |

This content is from the iframe.

18 | 19 | 20 | load-c2

21 | load-c3

22 |
23 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /core/src/test/resources/site/iframe/iframe2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Iframe 2 4 | 5 | 6 | 7 | 16 | 17 | 18 | 19 | 20 |

This content is from the iframe.

21 | 22 | load-c8

23 | load-c9

24 | 25 | 27 | 28 |
29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /core/src/test/resources/site/iframe/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Window with iframe 4 | 5 | 7 | 8 | 15 | 20 | 21 |
22 |

This document contains an iframe

23 | load-c1 load-c6 load-c7 29 |
30 | 32 | 34 | 35 |
This is where the content will be
36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /core/src/test/resources/site/iframe/page0-0-0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Nested IFrame 4 | 5 | 6 | 28 | 29 | 30 | 31 | load-c11 32 | 34 | 36 |
37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /core/src/test/resources/site/iframe/page0-0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | First iframe 4 | 5 | 21 | 22 | 23 | 24 | 26 | 28 | 22 | 23 | --------------------------------------------------------------------------------